avx512f.rs source code [crates/core_arch/src/x86/avx512f.rs]

1	use crate::{
2	arch::asm,
3	core_arch::{simd::, x86::},
4	intrinsics::simd::*,
5	intrinsics::{fmaf32, fmaf64},
6	mem, ptr,
7	};
8
9	use core::hint::unreachable_unchecked;
10	#[cfg(test)]
11	use stdarch_test::assert_instr;
12
13	/// Computes the absolute values of packed 32-bit integers in `a`.
14	///
15	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi32&expand=39)
16	#[inline]
17	#[target_feature(enable = "avx512f")]
18	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19	#[cfg_attr(test, assert_instr(vpabsd))]
20	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21	pub const fn _mm512_abs_epi32(a: __m512i) -> __m512i {
22	unsafe {
23	let a: Simd = a.as_i32x16();
24	let r: Simd = simd_select::<i32x16, _>(mask:simd_lt(a, i32x16::ZERO), if_true:simd_neg(a), if_false:a);
25	transmute(src:r)
26	}
27	}
28
29	/// Computes the absolute value of packed 32-bit integers in `a`, and store the
30	/// unsigned results in `dst` using writemask `k` (elements are copied from
31	/// `src` when the corresponding mask bit is not set).
32	///
33	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi32&expand=40)
34	#[inline]
35	#[target_feature(enable = "avx512f")]
36	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37	#[cfg_attr(test, assert_instr(vpabsd))]
38	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39	pub const fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
40	unsafe {
41	let abs: Simd = _mm512_abs_epi32(a).as_i32x16();
42	transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i32x16()))
43	}
44	}
45
46	/// Computes the absolute value of packed 32-bit integers in `a`, and store the
47	/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
48	/// the corresponding mask bit is not set).
49	///
50	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi32&expand=41)
51	#[inline]
52	#[target_feature(enable = "avx512f")]
53	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
54	#[cfg_attr(test, assert_instr(vpabsd))]
55	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
56	pub const fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
57	unsafe {
58	let abs: Simd = _mm512_abs_epi32(a).as_i32x16();
59	transmute(src:simd_select_bitmask(m:k, yes:abs, no:i32x16::ZERO))
60	}
61	}
62
63	/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
64	///
65	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi32&expand=37)
66	#[inline]
67	#[target_feature(enable = "avx512f,avx512vl")]
68	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
69	#[cfg_attr(test, assert_instr(vpabsd))]
70	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
71	pub const fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
72	unsafe {
73	let abs: Simd = _mm256_abs_epi32(a).as_i32x8();
74	transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i32x8()))
75	}
76	}
77
78	/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
79	///
80	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi32&expand=38)
81	#[inline]
82	#[target_feature(enable = "avx512f,avx512vl")]
83	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
84	#[cfg_attr(test, assert_instr(vpabsd))]
85	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
86	pub const fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i {
87	unsafe {
88	let abs: Simd = _mm256_abs_epi32(a).as_i32x8();
89	transmute(src:simd_select_bitmask(m:k, yes:abs, no:i32x8::ZERO))
90	}
91	}
92
93	/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
94	///
95	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi32&expand=34)
96	#[inline]
97	#[target_feature(enable = "avx512f,avx512vl")]
98	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
99	#[cfg_attr(test, assert_instr(vpabsd))]
100	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
101	pub const fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
102	unsafe {
103	let abs: Simd = _mm_abs_epi32(a).as_i32x4();
104	transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i32x4()))
105	}
106	}
107
108	/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
109	///
110	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi32&expand=35)
111	#[inline]
112	#[target_feature(enable = "avx512f,avx512vl")]
113	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
114	#[cfg_attr(test, assert_instr(vpabsd))]
115	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
116	pub const fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i {
117	unsafe {
118	let abs: Simd = _mm_abs_epi32(a).as_i32x4();
119	transmute(src:simd_select_bitmask(m:k, yes:abs, no:i32x4::ZERO))
120	}
121	}
122
123	/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
124	///
125	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi64&expand=48)
126	#[inline]
127	#[target_feature(enable = "avx512f")]
128	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
129	#[cfg_attr(test, assert_instr(vpabsq))]
130	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
131	pub const fn _mm512_abs_epi64(a: __m512i) -> __m512i {
132	unsafe {
133	let a: Simd = a.as_i64x8();
134	let r: Simd = simd_select::<i64x8, _>(mask:simd_lt(a, i64x8::ZERO), if_true:simd_neg(a), if_false:a);
135	transmute(src:r)
136	}
137	}
138
139	/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
140	///
141	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi64&expand=49)
142	#[inline]
143	#[target_feature(enable = "avx512f")]
144	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
145	#[cfg_attr(test, assert_instr(vpabsq))]
146	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
147	pub const fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
148	unsafe {
149	let abs: Simd = _mm512_abs_epi64(a).as_i64x8();
150	transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i64x8()))
151	}
152	}
153
154	/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
155	///
156	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi64&expand=50)
157	#[inline]
158	#[target_feature(enable = "avx512f")]
159	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
160	#[cfg_attr(test, assert_instr(vpabsq))]
161	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
162	pub const fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
163	unsafe {
164	let abs: Simd = _mm512_abs_epi64(a).as_i64x8();
165	transmute(src:simd_select_bitmask(m:k, yes:abs, no:i64x8::ZERO))
166	}
167	}
168
169	/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
170	///
171	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi64&expand=45)
172	#[inline]
173	#[target_feature(enable = "avx512f,avx512vl")]
174	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
175	#[cfg_attr(test, assert_instr(vpabsq))]
176	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
177	pub const fn _mm256_abs_epi64(a: __m256i) -> __m256i {
178	unsafe {
179	let a: Simd = a.as_i64x4();
180	let r: Simd = simd_select::<i64x4, _>(mask:simd_lt(a, i64x4::ZERO), if_true:simd_neg(a), if_false:a);
181	transmute(src:r)
182	}
183	}
184
185	/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
186	///
187	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi64&expand=46)
188	#[inline]
189	#[target_feature(enable = "avx512f,avx512vl")]
190	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
191	#[cfg_attr(test, assert_instr(vpabsq))]
192	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
193	pub const fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
194	unsafe {
195	let abs: Simd = _mm256_abs_epi64(a).as_i64x4();
196	transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i64x4()))
197	}
198	}
199
200	/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
201	///
202	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi64)
203	#[inline]
204	#[target_feature(enable = "avx512f,avx512vl")]
205	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
206	#[cfg_attr(test, assert_instr(vpabsq))]
207	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
208	pub const fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i {
209	unsafe {
210	let abs: Simd = _mm256_abs_epi64(a).as_i64x4();
211	transmute(src:simd_select_bitmask(m:k, yes:abs, no:i64x4::ZERO))
212	}
213	}
214
215	/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
216	///
217	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi64)
218	#[inline]
219	#[target_feature(enable = "avx512f,avx512vl")]
220	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
221	#[cfg_attr(test, assert_instr(vpabsq))]
222	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
223	pub const fn _mm_abs_epi64(a: __m128i) -> __m128i {
224	unsafe {
225	let a: Simd = a.as_i64x2();
226	let r: Simd = simd_select::<i64x2, _>(mask:simd_lt(a, i64x2::ZERO), if_true:simd_neg(a), if_false:a);
227	transmute(src:r)
228	}
229	}
230
231	/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
232	///
233	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi64)
234	#[inline]
235	#[target_feature(enable = "avx512f,avx512vl")]
236	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
237	#[cfg_attr(test, assert_instr(vpabsq))]
238	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
239	pub const fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
240	unsafe {
241	let abs: Simd = _mm_abs_epi64(a).as_i64x2();
242	transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i64x2()))
243	}
244	}
245
246	/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
247	///
248	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi64)
249	#[inline]
250	#[target_feature(enable = "avx512f,avx512vl")]
251	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
252	#[cfg_attr(test, assert_instr(vpabsq))]
253	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
254	pub const fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i {
255	unsafe {
256	let abs: Simd = _mm_abs_epi64(a).as_i64x2();
257	transmute(src:simd_select_bitmask(m:k, yes:abs, no:i64x2::ZERO))
258	}
259	}
260
261	/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst.
262	///
263	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_ps&expand=65)
264	#[inline]
265	#[target_feature(enable = "avx512f")]
266	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
267	#[cfg_attr(test, assert_instr(vpandd))]
268	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
269	pub const fn _mm512_abs_ps(v2: __m512) -> __m512 {
270	unsafe { simd_fabs(v2) }
271	}
272
273	/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
274	///
275	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_ps&expand=66)
276	#[inline]
277	#[target_feature(enable = "avx512f")]
278	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
279	#[cfg_attr(test, assert_instr(vpandd))]
280	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
281	pub const fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
282	unsafe { simd_select_bitmask(m:k, yes:simd_fabs(v2), no:src) }
283	}
284
285	/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst.
286	///
287	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_pd&expand=60)
288	#[inline]
289	#[target_feature(enable = "avx512f")]
290	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
291	#[cfg_attr(test, assert_instr(vpandq))]
292	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
293	pub const fn _mm512_abs_pd(v2: __m512d) -> __m512d {
294	unsafe { simd_fabs(v2) }
295	}
296
297	/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
298	///
299	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_pd&expand=61)
300	#[inline]
301	#[target_feature(enable = "avx512f")]
302	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
303	#[cfg_attr(test, assert_instr(vpandq))]
304	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
305	pub const fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
306	unsafe { simd_select_bitmask(m:k, yes:simd_fabs(v2), no:src) }
307	}
308
309	/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
310	///
311	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi32&expand=3801)
312	#[inline]
313	#[target_feature(enable = "avx512f")]
314	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
315	#[cfg_attr(test, assert_instr(vmovdqa32))]
316	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
317	pub const fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
318	unsafe {
319	let mov: Simd = a.as_i32x16();
320	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i32x16()))
321	}
322	}
323
324	/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
325	///
326	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi32&expand=3802)
327	#[inline]
328	#[target_feature(enable = "avx512f")]
329	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
330	#[cfg_attr(test, assert_instr(vmovdqa32))]
331	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
332	pub const fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i {
333	unsafe {
334	let mov: Simd = a.as_i32x16();
335	transmute(src:simd_select_bitmask(m:k, yes:mov, no:i32x16::ZERO))
336	}
337	}
338
339	/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
340	///
341	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi32&expand=3799)
342	#[inline]
343	#[target_feature(enable = "avx512f,avx512vl")]
344	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
345	#[cfg_attr(test, assert_instr(vmovdqa32))]
346	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
347	pub const fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
348	unsafe {
349	let mov: Simd = a.as_i32x8();
350	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i32x8()))
351	}
352	}
353
354	/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
355	///
356	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi32&expand=3800)
357	#[inline]
358	#[target_feature(enable = "avx512f,avx512vl")]
359	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
360	#[cfg_attr(test, assert_instr(vmovdqa32))]
361	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
362	pub const fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i {
363	unsafe {
364	let mov: Simd = a.as_i32x8();
365	transmute(src:simd_select_bitmask(m:k, yes:mov, no:i32x8::ZERO))
366	}
367	}
368
369	/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
370	///
371	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi32&expand=3797)
372	#[inline]
373	#[target_feature(enable = "avx512f,avx512vl")]
374	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
375	#[cfg_attr(test, assert_instr(vmovdqa32))]
376	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
377	pub const fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
378	unsafe {
379	let mov: Simd = a.as_i32x4();
380	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i32x4()))
381	}
382	}
383
384	/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
385	///
386	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi32&expand=3798)
387	#[inline]
388	#[target_feature(enable = "avx512f,avx512vl")]
389	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
390	#[cfg_attr(test, assert_instr(vmovdqa32))]
391	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
392	pub const fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i {
393	unsafe {
394	let mov: Simd = a.as_i32x4();
395	transmute(src:simd_select_bitmask(m:k, yes:mov, no:i32x4::ZERO))
396	}
397	}
398
399	/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
400	///
401	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi64&expand=3807)
402	#[inline]
403	#[target_feature(enable = "avx512f")]
404	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
405	#[cfg_attr(test, assert_instr(vmovdqa64))]
406	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
407	pub const fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
408	unsafe {
409	let mov: Simd = a.as_i64x8();
410	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i64x8()))
411	}
412	}
413
414	/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
415	///
416	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi64&expand=3808)
417	#[inline]
418	#[target_feature(enable = "avx512f")]
419	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
420	#[cfg_attr(test, assert_instr(vmovdqa64))]
421	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
422	pub const fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i {
423	unsafe {
424	let mov: Simd = a.as_i64x8();
425	transmute(src:simd_select_bitmask(m:k, yes:mov, no:i64x8::ZERO))
426	}
427	}
428
429	/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
430	///
431	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi64&expand=3805)
432	#[inline]
433	#[target_feature(enable = "avx512f,avx512vl")]
434	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
435	#[cfg_attr(test, assert_instr(vmovdqa64))]
436	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
437	pub const fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
438	unsafe {
439	let mov: Simd = a.as_i64x4();
440	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i64x4()))
441	}
442	}
443
444	/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
445	///
446	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi64&expand=3806)
447	#[inline]
448	#[target_feature(enable = "avx512f,avx512vl")]
449	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
450	#[cfg_attr(test, assert_instr(vmovdqa64))]
451	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
452	pub const fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i {
453	unsafe {
454	let mov: Simd = a.as_i64x4();
455	transmute(src:simd_select_bitmask(m:k, yes:mov, no:i64x4::ZERO))
456	}
457	}
458
459	/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
460	///
461	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi64&expand=3803)
462	#[inline]
463	#[target_feature(enable = "avx512f,avx512vl")]
464	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
465	#[cfg_attr(test, assert_instr(vmovdqa64))]
466	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
467	pub const fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
468	unsafe {
469	let mov: Simd = a.as_i64x2();
470	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i64x2()))
471	}
472	}
473
474	/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
475	///
476	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi64&expand=3804)
477	#[inline]
478	#[target_feature(enable = "avx512f,avx512vl")]
479	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
480	#[cfg_attr(test, assert_instr(vmovdqa64))]
481	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
482	pub const fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i {
483	unsafe {
484	let mov: Simd = a.as_i64x2();
485	transmute(src:simd_select_bitmask(m:k, yes:mov, no:i64x2::ZERO))
486	}
487	}
488
489	/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
490	///
491	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_ps&expand=3825)
492	#[inline]
493	#[target_feature(enable = "avx512f")]
494	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
495	#[cfg_attr(test, assert_instr(vmovaps))]
496	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
497	pub const fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
498	unsafe {
499	let mov: Simd = a.as_f32x16();
500	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x16()))
501	}
502	}
503
504	/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
505	///
506	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_ps&expand=3826)
507	#[inline]
508	#[target_feature(enable = "avx512f")]
509	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
510	#[cfg_attr(test, assert_instr(vmovaps))]
511	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
512	pub const fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 {
513	unsafe {
514	let mov: Simd = a.as_f32x16();
515	transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x16::ZERO))
516	}
517	}
518
519	/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
520	///
521	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_ps&expand=3823)
522	#[inline]
523	#[target_feature(enable = "avx512f,avx512vl")]
524	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
525	#[cfg_attr(test, assert_instr(vmovaps))]
526	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
527	pub const fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
528	unsafe {
529	let mov: Simd = a.as_f32x8();
530	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x8()))
531	}
532	}
533
534	/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
535	///
536	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_ps&expand=3824)
537	#[inline]
538	#[target_feature(enable = "avx512f,avx512vl")]
539	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
540	#[cfg_attr(test, assert_instr(vmovaps))]
541	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
542	pub const fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 {
543	unsafe {
544	let mov: Simd = a.as_f32x8();
545	transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x8::ZERO))
546	}
547	}
548
549	/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
550	///
551	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_ps&expand=3821)
552	#[inline]
553	#[target_feature(enable = "avx512f,avx512vl")]
554	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
555	#[cfg_attr(test, assert_instr(vmovaps))]
556	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
557	pub const fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
558	unsafe {
559	let mov: Simd = a.as_f32x4();
560	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x4()))
561	}
562	}
563
564	/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
565	///
566	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_ps&expand=3822)
567	#[inline]
568	#[target_feature(enable = "avx512f,avx512vl")]
569	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
570	#[cfg_attr(test, assert_instr(vmovaps))]
571	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
572	pub const fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 {
573	unsafe {
574	let mov: Simd = a.as_f32x4();
575	transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x4::ZERO))
576	}
577	}
578
579	/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
580	///
581	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_pd&expand=3819)
582	#[inline]
583	#[target_feature(enable = "avx512f")]
584	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
585	#[cfg_attr(test, assert_instr(vmovapd))]
586	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
587	pub const fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
588	unsafe {
589	let mov: Simd = a.as_f64x8();
590	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x8()))
591	}
592	}
593
594	/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
595	///
596	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_pd&expand=3820)
597	#[inline]
598	#[target_feature(enable = "avx512f")]
599	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
600	#[cfg_attr(test, assert_instr(vmovapd))]
601	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
602	pub const fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d {
603	unsafe {
604	let mov: Simd = a.as_f64x8();
605	transmute(src:simd_select_bitmask(m:k, yes:mov, no:f64x8::ZERO))
606	}
607	}
608
609	/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
610	///
611	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_pd&expand=3817)
612	#[inline]
613	#[target_feature(enable = "avx512f,avx512vl")]
614	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
615	#[cfg_attr(test, assert_instr(vmovapd))]
616	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
617	pub const fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
618	unsafe {
619	let mov: Simd = a.as_f64x4();
620	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x4()))
621	}
622	}
623
624	/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
625	///
626	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_pd&expand=3818)
627	#[inline]
628	#[target_feature(enable = "avx512f,avx512vl")]
629	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
630	#[cfg_attr(test, assert_instr(vmovapd))]
631	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
632	pub const fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d {
633	unsafe {
634	let mov: Simd = a.as_f64x4();
635	transmute(src:simd_select_bitmask(m:k, yes:mov, no:f64x4::ZERO))
636	}
637	}
638
639	/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
640	///
641	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_pd&expand=3815)
642	#[inline]
643	#[target_feature(enable = "avx512f,avx512vl")]
644	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
645	#[cfg_attr(test, assert_instr(vmovapd))]
646	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
647	pub const fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
648	unsafe {
649	let mov: Simd = a.as_f64x2();
650	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x2()))
651	}
652	}
653
654	/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
655	///
656	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_pd&expand=3816)
657	#[inline]
658	#[target_feature(enable = "avx512f,avx512vl")]
659	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
660	#[cfg_attr(test, assert_instr(vmovapd))]
661	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
662	pub const fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d {
663	unsafe {
664	let mov: Simd = a.as_f64x2();
665	transmute(src:simd_select_bitmask(m:k, yes:mov, no:f64x2::ZERO))
666	}
667	}
668
669	/// Add packed 32-bit integers in a and b, and store the results in dst.
670	///
671	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi32&expand=100)
672	#[inline]
673	#[target_feature(enable = "avx512f")]
674	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
675	#[cfg_attr(test, assert_instr(vpaddd))]
676	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
677	pub const fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
678	unsafe { transmute(src:simd_add(x:a.as_i32x16(), y:b.as_i32x16())) }
679	}
680
681	/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
682	///
683	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi32&expand=101)
684	#[inline]
685	#[target_feature(enable = "avx512f")]
686	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
687	#[cfg_attr(test, assert_instr(vpaddd))]
688	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
689	pub const fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
690	unsafe {
691	let add: Simd = _mm512_add_epi32(a, b).as_i32x16();
692	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i32x16()))
693	}
694	}
695
696	/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
697	///
698	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi32&expand=102)
699	#[inline]
700	#[target_feature(enable = "avx512f")]
701	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
702	#[cfg_attr(test, assert_instr(vpaddd))]
703	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
704	pub const fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
705	unsafe {
706	let add: Simd = _mm512_add_epi32(a, b).as_i32x16();
707	transmute(src:simd_select_bitmask(m:k, yes:add, no:i32x16::ZERO))
708	}
709	}
710
711	/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
712	///
713	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi32&expand=98)
714	#[inline]
715	#[target_feature(enable = "avx512f,avx512vl")]
716	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
717	#[cfg_attr(test, assert_instr(vpaddd))]
718	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
719	pub const fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
720	unsafe {
721	let add: Simd = _mm256_add_epi32(a, b).as_i32x8();
722	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i32x8()))
723	}
724	}
725
726	/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
727	///
728	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi32&expand=99)
729	#[inline]
730	#[target_feature(enable = "avx512f,avx512vl")]
731	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
732	#[cfg_attr(test, assert_instr(vpaddd))]
733	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
734	pub const fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
735	unsafe {
736	let add: Simd = _mm256_add_epi32(a, b).as_i32x8();
737	transmute(src:simd_select_bitmask(m:k, yes:add, no:i32x8::ZERO))
738	}
739	}
740
741	/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
742	///
743	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi32&expand=95)
744	#[inline]
745	#[target_feature(enable = "avx512f,avx512vl")]
746	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
747	#[cfg_attr(test, assert_instr(vpaddd))]
748	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
749	pub const fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
750	unsafe {
751	let add: Simd = _mm_add_epi32(a, b).as_i32x4();
752	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i32x4()))
753	}
754	}
755
756	/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
757	///
758	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi32&expand=96)
759	#[inline]
760	#[target_feature(enable = "avx512f,avx512vl")]
761	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
762	#[cfg_attr(test, assert_instr(vpaddd))]
763	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
764	pub const fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
765	unsafe {
766	let add: Simd = _mm_add_epi32(a, b).as_i32x4();
767	transmute(src:simd_select_bitmask(m:k, yes:add, no:i32x4::ZERO))
768	}
769	}
770
771	/// Add packed 64-bit integers in a and b, and store the results in dst.
772	///
773	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi64&expand=109)
774	#[inline]
775	#[target_feature(enable = "avx512f")]
776	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
777	#[cfg_attr(test, assert_instr(vpaddq))]
778	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
779	pub const fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
780	unsafe { transmute(src:simd_add(x:a.as_i64x8(), y:b.as_i64x8())) }
781	}
782
783	/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
784	///
785	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi64&expand=110)
786	#[inline]
787	#[target_feature(enable = "avx512f")]
788	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
789	#[cfg_attr(test, assert_instr(vpaddq))]
790	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
791	pub const fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
792	unsafe {
793	let add: Simd = _mm512_add_epi64(a, b).as_i64x8();
794	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i64x8()))
795	}
796	}
797
798	/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
799	///
800	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi64&expand=111)
801	#[inline]
802	#[target_feature(enable = "avx512f")]
803	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
804	#[cfg_attr(test, assert_instr(vpaddq))]
805	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
806	pub const fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
807	unsafe {
808	let add: Simd = _mm512_add_epi64(a, b).as_i64x8();
809	transmute(src:simd_select_bitmask(m:k, yes:add, no:i64x8::ZERO))
810	}
811	}
812
813	/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
814	///
815	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi64&expand=107)
816	#[inline]
817	#[target_feature(enable = "avx512f,avx512vl")]
818	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
819	#[cfg_attr(test, assert_instr(vpaddq))]
820	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
821	pub const fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
822	unsafe {
823	let add: Simd = _mm256_add_epi64(a, b).as_i64x4();
824	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i64x4()))
825	}
826	}
827
828	/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
829	///
830	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi64&expand=108)
831	#[inline]
832	#[target_feature(enable = "avx512f,avx512vl")]
833	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
834	#[cfg_attr(test, assert_instr(vpaddq))]
835	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
836	pub const fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
837	unsafe {
838	let add: Simd = _mm256_add_epi64(a, b).as_i64x4();
839	transmute(src:simd_select_bitmask(m:k, yes:add, no:i64x4::ZERO))
840	}
841	}
842
843	/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
844	///
845	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi64&expand=104)
846	#[inline]
847	#[target_feature(enable = "avx512f,avx512vl")]
848	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
849	#[cfg_attr(test, assert_instr(vpaddq))]
850	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
851	pub const fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
852	unsafe {
853	let add: Simd = _mm_add_epi64(a, b).as_i64x2();
854	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i64x2()))
855	}
856	}
857
858	/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
859	///
860	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi64&expand=105)
861	#[inline]
862	#[target_feature(enable = "avx512f,avx512vl")]
863	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
864	#[cfg_attr(test, assert_instr(vpaddq))]
865	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
866	pub const fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
867	unsafe {
868	let add: Simd = _mm_add_epi64(a, b).as_i64x2();
869	transmute(src:simd_select_bitmask(m:k, yes:add, no:i64x2::ZERO))
870	}
871	}
872
873	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
874	///
875	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_ps&expand=139)
876	#[inline]
877	#[target_feature(enable = "avx512f")]
878	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
879	#[cfg_attr(test, assert_instr(vaddps))]
880	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
881	pub const fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
882	unsafe { transmute(src:simd_add(x:a.as_f32x16(), y:b.as_f32x16())) }
883	}
884
885	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
886	///
887	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_ps&expand=140)
888	#[inline]
889	#[target_feature(enable = "avx512f")]
890	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
891	#[cfg_attr(test, assert_instr(vaddps))]
892	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
893	pub const fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
894	unsafe {
895	let add: Simd = _mm512_add_ps(a, b).as_f32x16();
896	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f32x16()))
897	}
898	}
899
900	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
901	///
902	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_ps&expand=141)
903	#[inline]
904	#[target_feature(enable = "avx512f")]
905	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
906	#[cfg_attr(test, assert_instr(vaddps))]
907	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
908	pub const fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
909	unsafe {
910	let add: Simd = _mm512_add_ps(a, b).as_f32x16();
911	transmute(src:simd_select_bitmask(m:k, yes:add, no:f32x16::ZERO))
912	}
913	}
914
915	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
916	///
917	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_ps&expand=137)
918	#[inline]
919	#[target_feature(enable = "avx512f,avx512vl")]
920	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
921	#[cfg_attr(test, assert_instr(vaddps))]
922	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
923	pub const fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
924	unsafe {
925	let add: Simd = _mm256_add_ps(a, b).as_f32x8();
926	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f32x8()))
927	}
928	}
929
930	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
931	///
932	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_ps&expand=138)
933	#[inline]
934	#[target_feature(enable = "avx512f,avx512vl")]
935	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
936	#[cfg_attr(test, assert_instr(vaddps))]
937	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
938	pub const fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
939	unsafe {
940	let add: Simd = _mm256_add_ps(a, b).as_f32x8();
941	transmute(src:simd_select_bitmask(m:k, yes:add, no:f32x8::ZERO))
942	}
943	}
944
945	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
946	///
947	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_ps&expand=134)
948	#[inline]
949	#[target_feature(enable = "avx512f,avx512vl")]
950	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
951	#[cfg_attr(test, assert_instr(vaddps))]
952	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
953	pub const fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
954	unsafe {
955	let add: Simd = _mm_add_ps(a, b).as_f32x4();
956	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f32x4()))
957	}
958	}
959
960	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
961	///
962	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_ps&expand=135)
963	#[inline]
964	#[target_feature(enable = "avx512f,avx512vl")]
965	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
966	#[cfg_attr(test, assert_instr(vaddps))]
967	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
968	pub const fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
969	unsafe {
970	let add: Simd = _mm_add_ps(a, b).as_f32x4();
971	transmute(src:simd_select_bitmask(m:k, yes:add, no:f32x4::ZERO))
972	}
973	}
974
975	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
976	///
977	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_pd&expand=127)
978	#[inline]
979	#[target_feature(enable = "avx512f")]
980	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
981	#[cfg_attr(test, assert_instr(vaddpd))]
982	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
983	pub const fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
984	unsafe { transmute(src:simd_add(x:a.as_f64x8(), y:b.as_f64x8())) }
985	}
986
987	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
988	///
989	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_pd&expand=128)
990	#[inline]
991	#[target_feature(enable = "avx512f")]
992	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
993	#[cfg_attr(test, assert_instr(vaddpd))]
994	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
995	pub const fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
996	unsafe {
997	let add: Simd = _mm512_add_pd(a, b).as_f64x8();
998	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f64x8()))
999	}
1000	}
1001
1002	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1003	///
1004	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_pd&expand=129)
1005	#[inline]
1006	#[target_feature(enable = "avx512f")]
1007	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1008	#[cfg_attr(test, assert_instr(vaddpd))]
1009	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1010	pub const fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1011	unsafe {
1012	let add: Simd = _mm512_add_pd(a, b).as_f64x8();
1013	transmute(src:simd_select_bitmask(m:k, yes:add, no:f64x8::ZERO))
1014	}
1015	}
1016
1017	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1018	///
1019	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_pd&expand=125)
1020	#[inline]
1021	#[target_feature(enable = "avx512f,avx512vl")]
1022	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1023	#[cfg_attr(test, assert_instr(vaddpd))]
1024	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1025	pub const fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1026	unsafe {
1027	let add: Simd = _mm256_add_pd(a, b).as_f64x4();
1028	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f64x4()))
1029	}
1030	}
1031
1032	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1033	///
1034	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_pd&expand=126)
1035	#[inline]
1036	#[target_feature(enable = "avx512f,avx512vl")]
1037	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1038	#[cfg_attr(test, assert_instr(vaddpd))]
1039	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1040	pub const fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1041	unsafe {
1042	let add: Simd = _mm256_add_pd(a, b).as_f64x4();
1043	transmute(src:simd_select_bitmask(m:k, yes:add, no:f64x4::ZERO))
1044	}
1045	}
1046
1047	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1048	///
1049	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_pd&expand=122)
1050	#[inline]
1051	#[target_feature(enable = "avx512f,avx512vl")]
1052	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1053	#[cfg_attr(test, assert_instr(vaddpd))]
1054	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1055	pub const fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1056	unsafe {
1057	let add: Simd = _mm_add_pd(a, b).as_f64x2();
1058	transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f64x2()))
1059	}
1060	}
1061
1062	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1063	///
1064	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_pd&expand=123)
1065	#[inline]
1066	#[target_feature(enable = "avx512f,avx512vl")]
1067	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1068	#[cfg_attr(test, assert_instr(vaddpd))]
1069	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1070	pub const fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1071	unsafe {
1072	let add: Simd = _mm_add_pd(a, b).as_f64x2();
1073	transmute(src:simd_select_bitmask(m:k, yes:add, no:f64x2::ZERO))
1074	}
1075	}
1076
1077	/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst.
1078	///
1079	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi32&expand=5694)
1080	#[inline]
1081	#[target_feature(enable = "avx512f")]
1082	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1083	#[cfg_attr(test, assert_instr(vpsubd))]
1084	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1085	pub const fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
1086	unsafe { transmute(src:simd_sub(lhs:a.as_i32x16(), rhs:b.as_i32x16())) }
1087	}
1088
1089	/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1090	///
1091	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi32&expand=5692)
1092	#[inline]
1093	#[target_feature(enable = "avx512f")]
1094	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1095	#[cfg_attr(test, assert_instr(vpsubd))]
1096	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1097	pub const fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1098	unsafe {
1099	let sub: Simd = _mm512_sub_epi32(a, b).as_i32x16();
1100	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i32x16()))
1101	}
1102	}
1103
1104	/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1105	///
1106	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi32&expand=5693)
1107	#[inline]
1108	#[target_feature(enable = "avx512f")]
1109	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1110	#[cfg_attr(test, assert_instr(vpsubd))]
1111	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1112	pub const fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1113	unsafe {
1114	let sub: Simd = _mm512_sub_epi32(a, b).as_i32x16();
1115	transmute(src:simd_select_bitmask(m:k, yes:sub, no:i32x16::ZERO))
1116	}
1117	}
1118
1119	/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1120	///
1121	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi32&expand=5689)
1122	#[inline]
1123	#[target_feature(enable = "avx512f,avx512vl")]
1124	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1125	#[cfg_attr(test, assert_instr(vpsubd))]
1126	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1127	pub const fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1128	unsafe {
1129	let sub: Simd = _mm256_sub_epi32(a, b).as_i32x8();
1130	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i32x8()))
1131	}
1132	}
1133
1134	/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1135	///
1136	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi32&expand=5690)
1137	#[inline]
1138	#[target_feature(enable = "avx512f,avx512vl")]
1139	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1140	#[cfg_attr(test, assert_instr(vpsubd))]
1141	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1142	pub const fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1143	unsafe {
1144	let sub: Simd = _mm256_sub_epi32(a, b).as_i32x8();
1145	transmute(src:simd_select_bitmask(m:k, yes:sub, no:i32x8::ZERO))
1146	}
1147	}
1148
1149	/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1150	///
1151	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi32&expand=5686)
1152	#[inline]
1153	#[target_feature(enable = "avx512f,avx512vl")]
1154	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1155	#[cfg_attr(test, assert_instr(vpsubd))]
1156	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1157	pub const fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1158	unsafe {
1159	let sub: Simd = _mm_sub_epi32(a, b).as_i32x4();
1160	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i32x4()))
1161	}
1162	}
1163
1164	/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1165	///
1166	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi32&expand=5687)
1167	#[inline]
1168	#[target_feature(enable = "avx512f,avx512vl")]
1169	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1170	#[cfg_attr(test, assert_instr(vpsubd))]
1171	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1172	pub const fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1173	unsafe {
1174	let sub: Simd = _mm_sub_epi32(a, b).as_i32x4();
1175	transmute(src:simd_select_bitmask(m:k, yes:sub, no:i32x4::ZERO))
1176	}
1177	}
1178
1179	/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst.
1180	///
1181	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi64&expand=5703)
1182	#[inline]
1183	#[target_feature(enable = "avx512f")]
1184	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1185	#[cfg_attr(test, assert_instr(vpsubq))]
1186	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1187	pub const fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
1188	unsafe { transmute(src:simd_sub(lhs:a.as_i64x8(), rhs:b.as_i64x8())) }
1189	}
1190
1191	/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1192	///
1193	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi64&expand=5701)
1194	#[inline]
1195	#[target_feature(enable = "avx512f")]
1196	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1197	#[cfg_attr(test, assert_instr(vpsubq))]
1198	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1199	pub const fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1200	unsafe {
1201	let sub: Simd = _mm512_sub_epi64(a, b).as_i64x8();
1202	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i64x8()))
1203	}
1204	}
1205
1206	/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1207	///
1208	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi64&expand=5702)
1209	#[inline]
1210	#[target_feature(enable = "avx512f")]
1211	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1212	#[cfg_attr(test, assert_instr(vpsubq))]
1213	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1214	pub const fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1215	unsafe {
1216	let sub: Simd = _mm512_sub_epi64(a, b).as_i64x8();
1217	transmute(src:simd_select_bitmask(m:k, yes:sub, no:i64x8::ZERO))
1218	}
1219	}
1220
1221	/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1222	///
1223	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi64&expand=5698)
1224	#[inline]
1225	#[target_feature(enable = "avx512f,avx512vl")]
1226	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1227	#[cfg_attr(test, assert_instr(vpsubq))]
1228	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1229	pub const fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1230	unsafe {
1231	let sub: Simd = _mm256_sub_epi64(a, b).as_i64x4();
1232	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i64x4()))
1233	}
1234	}
1235
1236	/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1237	///
1238	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi64&expand=5699)
1239	#[inline]
1240	#[target_feature(enable = "avx512f,avx512vl")]
1241	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1242	#[cfg_attr(test, assert_instr(vpsubq))]
1243	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1244	pub const fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1245	unsafe {
1246	let sub: Simd = _mm256_sub_epi64(a, b).as_i64x4();
1247	transmute(src:simd_select_bitmask(m:k, yes:sub, no:i64x4::ZERO))
1248	}
1249	}
1250
1251	/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1252	///
1253	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi64&expand=5695)
1254	#[inline]
1255	#[target_feature(enable = "avx512f,avx512vl")]
1256	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1257	#[cfg_attr(test, assert_instr(vpsubq))]
1258	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1259	pub const fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1260	unsafe {
1261	let sub: Simd = _mm_sub_epi64(a, b).as_i64x2();
1262	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i64x2()))
1263	}
1264	}
1265
1266	/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1267	///
1268	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi64&expand=5696)
1269	#[inline]
1270	#[target_feature(enable = "avx512f,avx512vl")]
1271	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1272	#[cfg_attr(test, assert_instr(vpsubq))]
1273	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1274	pub const fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1275	unsafe {
1276	let sub: Simd = _mm_sub_epi64(a, b).as_i64x2();
1277	transmute(src:simd_select_bitmask(m:k, yes:sub, no:i64x2::ZERO))
1278	}
1279	}
1280
1281	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
1282	///
1283	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_ps&expand=5733)
1284	#[inline]
1285	#[target_feature(enable = "avx512f")]
1286	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1287	#[cfg_attr(test, assert_instr(vsubps))]
1288	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1289	pub const fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
1290	unsafe { transmute(src:simd_sub(lhs:a.as_f32x16(), rhs:b.as_f32x16())) }
1291	}
1292
1293	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1294	///
1295	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_ps&expand=5731)
1296	#[inline]
1297	#[target_feature(enable = "avx512f")]
1298	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1299	#[cfg_attr(test, assert_instr(vsubps))]
1300	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1301	pub const fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1302	unsafe {
1303	let sub: Simd = _mm512_sub_ps(a, b).as_f32x16();
1304	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f32x16()))
1305	}
1306	}
1307
1308	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1309	///
1310	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_ps&expand=5732)
1311	#[inline]
1312	#[target_feature(enable = "avx512f")]
1313	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1314	#[cfg_attr(test, assert_instr(vsubps))]
1315	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1316	pub const fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1317	unsafe {
1318	let sub: Simd = _mm512_sub_ps(a, b).as_f32x16();
1319	transmute(src:simd_select_bitmask(m:k, yes:sub, no:f32x16::ZERO))
1320	}
1321	}
1322
1323	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1324	///
1325	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_ps&expand=5728)
1326	#[inline]
1327	#[target_feature(enable = "avx512f,avx512vl")]
1328	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1329	#[cfg_attr(test, assert_instr(vsubps))]
1330	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1331	pub const fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1332	unsafe {
1333	let sub: Simd = _mm256_sub_ps(a, b).as_f32x8();
1334	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f32x8()))
1335	}
1336	}
1337
1338	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1339	///
1340	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_ps&expand=5729)
1341	#[inline]
1342	#[target_feature(enable = "avx512f,avx512vl")]
1343	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1344	#[cfg_attr(test, assert_instr(vsubps))]
1345	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1346	pub const fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1347	unsafe {
1348	let sub: Simd = _mm256_sub_ps(a, b).as_f32x8();
1349	transmute(src:simd_select_bitmask(m:k, yes:sub, no:f32x8::ZERO))
1350	}
1351	}
1352
1353	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1354	///
1355	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_ps&expand=5725)
1356	#[inline]
1357	#[target_feature(enable = "avx512f,avx512vl")]
1358	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1359	#[cfg_attr(test, assert_instr(vsubps))]
1360	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1361	pub const fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1362	unsafe {
1363	let sub: Simd = _mm_sub_ps(a, b).as_f32x4();
1364	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f32x4()))
1365	}
1366	}
1367
1368	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1369	///
1370	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_ps&expand=5726)
1371	#[inline]
1372	#[target_feature(enable = "avx512f,avx512vl")]
1373	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1374	#[cfg_attr(test, assert_instr(vsubps))]
1375	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1376	pub const fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1377	unsafe {
1378	let sub: Simd = _mm_sub_ps(a, b).as_f32x4();
1379	transmute(src:simd_select_bitmask(m:k, yes:sub, no:f32x4::ZERO))
1380	}
1381	}
1382
1383	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
1384	///
1385	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_pd&expand=5721)
1386	#[inline]
1387	#[target_feature(enable = "avx512f")]
1388	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1389	#[cfg_attr(test, assert_instr(vsubpd))]
1390	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1391	pub const fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
1392	unsafe { transmute(src:simd_sub(lhs:a.as_f64x8(), rhs:b.as_f64x8())) }
1393	}
1394
1395	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1396	///
1397	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_pd&expand=5719)
1398	#[inline]
1399	#[target_feature(enable = "avx512f")]
1400	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1401	#[cfg_attr(test, assert_instr(vsubpd))]
1402	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1403	pub const fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1404	unsafe {
1405	let sub: Simd = _mm512_sub_pd(a, b).as_f64x8();
1406	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f64x8()))
1407	}
1408	}
1409
1410	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1411	///
1412	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_pd&expand=5720)
1413	#[inline]
1414	#[target_feature(enable = "avx512f")]
1415	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1416	#[cfg_attr(test, assert_instr(vsubpd))]
1417	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1418	pub const fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1419	unsafe {
1420	let sub: Simd = _mm512_sub_pd(a, b).as_f64x8();
1421	transmute(src:simd_select_bitmask(m:k, yes:sub, no:f64x8::ZERO))
1422	}
1423	}
1424
1425	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1426	///
1427	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_pd&expand=5716)
1428	#[inline]
1429	#[target_feature(enable = "avx512f,avx512vl")]
1430	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1431	#[cfg_attr(test, assert_instr(vsubpd))]
1432	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1433	pub const fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1434	unsafe {
1435	let sub: Simd = _mm256_sub_pd(a, b).as_f64x4();
1436	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f64x4()))
1437	}
1438	}
1439
1440	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1441	///
1442	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_pd&expand=5717)
1443	#[inline]
1444	#[target_feature(enable = "avx512f,avx512vl")]
1445	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1446	#[cfg_attr(test, assert_instr(vsubpd))]
1447	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1448	pub const fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1449	unsafe {
1450	let sub: Simd = _mm256_sub_pd(a, b).as_f64x4();
1451	transmute(src:simd_select_bitmask(m:k, yes:sub, no:f64x4::ZERO))
1452	}
1453	}
1454
1455	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1456	///
1457	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_pd&expand=5713)
1458	#[inline]
1459	#[target_feature(enable = "avx512f,avx512vl")]
1460	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1461	#[cfg_attr(test, assert_instr(vsubpd))]
1462	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1463	pub const fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1464	unsafe {
1465	let sub: Simd = _mm_sub_pd(a, b).as_f64x2();
1466	transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f64x2()))
1467	}
1468	}
1469
1470	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1471	///
1472	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_pd&expand=5714)
1473	#[inline]
1474	#[target_feature(enable = "avx512f,avx512vl")]
1475	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1476	#[cfg_attr(test, assert_instr(vsubpd))]
1477	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1478	pub const fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1479	unsafe {
1480	let sub: Simd = _mm_sub_pd(a, b).as_f64x2();
1481	transmute(src:simd_select_bitmask(m:k, yes:sub, no:f64x2::ZERO))
1482	}
1483	}
1484
1485	/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst.
1486	///
1487	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epi32&expand=3907)
1488	#[inline]
1489	#[target_feature(enable = "avx512f")]
1490	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1491	#[cfg_attr(test, assert_instr(vpmuldq))]
1492	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1493	pub const fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
1494	unsafe {
1495	let a: Simd = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(a.as_i64x8()));
1496	let b: Simd = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(b.as_i64x8()));
1497	transmute(src:simd_mul(x:a, y:b))
1498	}
1499	}
1500
1501	/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1502	///
1503	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epi32&expand=3905)
1504	#[inline]
1505	#[target_feature(enable = "avx512f")]
1506	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1507	#[cfg_attr(test, assert_instr(vpmuldq))]
1508	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1509	pub const fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1510	unsafe {
1511	let mul: Simd = _mm512_mul_epi32(a, b).as_i64x8();
1512	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x8()))
1513	}
1514	}
1515
1516	/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1517	///
1518	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epi32&expand=3906)
1519	#[inline]
1520	#[target_feature(enable = "avx512f")]
1521	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1522	#[cfg_attr(test, assert_instr(vpmuldq))]
1523	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1524	pub const fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1525	unsafe {
1526	let mul: Simd = _mm512_mul_epi32(a, b).as_i64x8();
1527	transmute(src:simd_select_bitmask(m:k, yes:mul, no:i64x8::ZERO))
1528	}
1529	}
1530
1531	/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1532	///
1533	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epi32&expand=3902)
1534	#[inline]
1535	#[target_feature(enable = "avx512f,avx512vl")]
1536	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1537	#[cfg_attr(test, assert_instr(vpmuldq))]
1538	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1539	pub const fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1540	unsafe {
1541	let mul: Simd = _mm256_mul_epi32(a, b).as_i64x4();
1542	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x4()))
1543	}
1544	}
1545
1546	/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1547	///
1548	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epi32&expand=3903)
1549	#[inline]
1550	#[target_feature(enable = "avx512f,avx512vl")]
1551	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1552	#[cfg_attr(test, assert_instr(vpmuldq))]
1553	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1554	pub const fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1555	unsafe {
1556	let mul: Simd = _mm256_mul_epi32(a, b).as_i64x4();
1557	transmute(src:simd_select_bitmask(m:k, yes:mul, no:i64x4::ZERO))
1558	}
1559	}
1560
1561	/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1562	///
1563	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epi32&expand=3899)
1564	#[inline]
1565	#[target_feature(enable = "avx512f,avx512vl")]
1566	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1567	#[cfg_attr(test, assert_instr(vpmuldq))]
1568	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1569	pub const fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1570	unsafe {
1571	let mul: Simd = _mm_mul_epi32(a, b).as_i64x2();
1572	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x2()))
1573	}
1574	}
1575
1576	/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1577	///
1578	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epi32&expand=3900)
1579	#[inline]
1580	#[target_feature(enable = "avx512f,avx512vl")]
1581	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1582	#[cfg_attr(test, assert_instr(vpmuldq))]
1583	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1584	pub const fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1585	unsafe {
1586	let mul: Simd = _mm_mul_epi32(a, b).as_i64x2();
1587	transmute(src:simd_select_bitmask(m:k, yes:mul, no:i64x2::ZERO))
1588	}
1589	}
1590
1591	/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst.
1592	///
1593	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi32&expand=4005)
1594	#[inline]
1595	#[target_feature(enable = "avx512f")]
1596	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1597	#[cfg_attr(test, assert_instr(vpmulld))]
1598	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1599	pub const fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
1600	unsafe { transmute(src:simd_mul(x:a.as_i32x16(), y:b.as_i32x16())) }
1601	}
1602
1603	/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1604	///
1605	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi32&expand=4003)
1606	#[inline]
1607	#[target_feature(enable = "avx512f")]
1608	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1609	#[cfg_attr(test, assert_instr(vpmulld))]
1610	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1611	pub const fn _mm512_mask_mullo_epi32(
1612	src: __m512i,
1613	k: __mmask16,
1614	a: __m512i,
1615	b: __m512i,
1616	) -> __m512i {
1617	unsafe {
1618	let mul: Simd = _mm512_mullo_epi32(a, b).as_i32x16();
1619	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i32x16()))
1620	}
1621	}
1622
1623	/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1624	///
1625	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi32&expand=4004)
1626	#[inline]
1627	#[target_feature(enable = "avx512f")]
1628	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1629	#[cfg_attr(test, assert_instr(vpmulld))]
1630	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1631	pub const fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1632	unsafe {
1633	let mul: Simd = _mm512_mullo_epi32(a, b).as_i32x16();
1634	transmute(src:simd_select_bitmask(m:k, yes:mul, no:i32x16::ZERO))
1635	}
1636	}
1637
1638	/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1639	///
1640	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi32&expand=4000)
1641	#[inline]
1642	#[target_feature(enable = "avx512f,avx512vl")]
1643	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1644	#[cfg_attr(test, assert_instr(vpmulld))]
1645	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1646	pub const fn _mm256_mask_mullo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1647	unsafe {
1648	let mul: Simd = _mm256_mullo_epi32(a, b).as_i32x8();
1649	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i32x8()))
1650	}
1651	}
1652
1653	/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1654	///
1655	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi32&expand=4001)
1656	#[inline]
1657	#[target_feature(enable = "avx512f,avx512vl")]
1658	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1659	#[cfg_attr(test, assert_instr(vpmulld))]
1660	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1661	pub const fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1662	unsafe {
1663	let mul: Simd = _mm256_mullo_epi32(a, b).as_i32x8();
1664	transmute(src:simd_select_bitmask(m:k, yes:mul, no:i32x8::ZERO))
1665	}
1666	}
1667
1668	/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1669	///
1670	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi32&expand=3997)
1671	#[inline]
1672	#[target_feature(enable = "avx512f,avx512vl")]
1673	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1674	#[cfg_attr(test, assert_instr(vpmulld))]
1675	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1676	pub const fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1677	unsafe {
1678	let mul: Simd = _mm_mullo_epi32(a, b).as_i32x4();
1679	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i32x4()))
1680	}
1681	}
1682
1683	/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1684	///
1685	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi32&expand=3998)
1686	#[inline]
1687	#[target_feature(enable = "avx512f,avx512vl")]
1688	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1689	#[cfg_attr(test, assert_instr(vpmulld))]
1690	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1691	pub const fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1692	unsafe {
1693	let mul: Simd = _mm_mullo_epi32(a, b).as_i32x4();
1694	transmute(src:simd_select_bitmask(m:k, yes:mul, no:i32x4::ZERO))
1695	}
1696	}
1697
1698	/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst.
1699	///
1700	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullox_epi64&expand=4017)
1701	///
1702	/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1703	#[inline]
1704	#[target_feature(enable = "avx512f")]
1705	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1706	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1707	pub const fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i {
1708	unsafe { transmute(src:simd_mul(x:a.as_i64x8(), y:b.as_i64x8())) }
1709	}
1710
1711	/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1712	///
1713	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullox_epi64&expand=4016)
1714	///
1715	/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1716	#[inline]
1717	#[target_feature(enable = "avx512f")]
1718	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1719	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1720	pub const fn _mm512_mask_mullox_epi64(
1721	src: __m512i,
1722	k: __mmask8,
1723	a: __m512i,
1724	b: __m512i,
1725	) -> __m512i {
1726	unsafe {
1727	let mul: Simd = _mm512_mullox_epi64(a, b).as_i64x8();
1728	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x8()))
1729	}
1730	}
1731
1732	/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst.
1733	///
1734	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epu32&expand=3916)
1735	#[inline]
1736	#[target_feature(enable = "avx512f")]
1737	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1738	#[cfg_attr(test, assert_instr(vpmuludq))]
1739	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1740	pub const fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
1741	unsafe {
1742	let a: Simd = a.as_u64x8();
1743	let b: Simd = b.as_u64x8();
1744	let mask: Simd = u64x8::splat(u32::MAX as u64);
1745	transmute(src:simd_mul(x:simd_and(a, mask), y:simd_and(x:b, y:mask)))
1746	}
1747	}
1748
1749	/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1750	///
1751	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epu32&expand=3914)
1752	#[inline]
1753	#[target_feature(enable = "avx512f")]
1754	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1755	#[cfg_attr(test, assert_instr(vpmuludq))]
1756	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1757	pub const fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1758	unsafe {
1759	let mul: Simd = _mm512_mul_epu32(a, b).as_u64x8();
1760	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_u64x8()))
1761	}
1762	}
1763
1764	/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1765	///
1766	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epu32&expand=3915)
1767	#[inline]
1768	#[target_feature(enable = "avx512f")]
1769	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1770	#[cfg_attr(test, assert_instr(vpmuludq))]
1771	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1772	pub const fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1773	unsafe {
1774	let mul: Simd = _mm512_mul_epu32(a, b).as_u64x8();
1775	transmute(src:simd_select_bitmask(m:k, yes:mul, no:u64x8::ZERO))
1776	}
1777	}
1778
1779	/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1780	///
1781	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epu32&expand=3911)
1782	#[inline]
1783	#[target_feature(enable = "avx512f,avx512vl")]
1784	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1785	#[cfg_attr(test, assert_instr(vpmuludq))]
1786	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1787	pub const fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1788	unsafe {
1789	let mul: Simd = _mm256_mul_epu32(a, b).as_u64x4();
1790	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_u64x4()))
1791	}
1792	}
1793
1794	/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1795	///
1796	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epu32&expand=3912)
1797	#[inline]
1798	#[target_feature(enable = "avx512f,avx512vl")]
1799	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1800	#[cfg_attr(test, assert_instr(vpmuludq))]
1801	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1802	pub const fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1803	unsafe {
1804	let mul: Simd = _mm256_mul_epu32(a, b).as_u64x4();
1805	transmute(src:simd_select_bitmask(m:k, yes:mul, no:u64x4::ZERO))
1806	}
1807	}
1808
1809	/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1810	///
1811	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epu32&expand=3908)
1812	#[inline]
1813	#[target_feature(enable = "avx512f,avx512vl")]
1814	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1815	#[cfg_attr(test, assert_instr(vpmuludq))]
1816	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1817	pub const fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1818	unsafe {
1819	let mul: Simd = _mm_mul_epu32(a, b).as_u64x2();
1820	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_u64x2()))
1821	}
1822	}
1823
1824	/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1825	///
1826	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epu32&expand=3909)
1827	#[inline]
1828	#[target_feature(enable = "avx512f,avx512vl")]
1829	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1830	#[cfg_attr(test, assert_instr(vpmuludq))]
1831	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1832	pub const fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1833	unsafe {
1834	let mul: Simd = _mm_mul_epu32(a, b).as_u64x2();
1835	transmute(src:simd_select_bitmask(m:k, yes:mul, no:u64x2::ZERO))
1836	}
1837	}
1838
1839	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
1840	///
1841	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_ps&expand=3934)
1842	#[inline]
1843	#[target_feature(enable = "avx512f")]
1844	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1845	#[cfg_attr(test, assert_instr(vmulps))]
1846	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1847	pub const fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
1848	unsafe { transmute(src:simd_mul(x:a.as_f32x16(), y:b.as_f32x16())) }
1849	}
1850
1851	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1852	///
1853	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_ps&expand=3932)
1854	#[inline]
1855	#[target_feature(enable = "avx512f")]
1856	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1857	#[cfg_attr(test, assert_instr(vmulps))]
1858	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1859	pub const fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1860	unsafe {
1861	let mul: Simd = _mm512_mul_ps(a, b).as_f32x16();
1862	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f32x16()))
1863	}
1864	}
1865
1866	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1867	///
1868	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_ps&expand=3933)
1869	#[inline]
1870	#[target_feature(enable = "avx512f")]
1871	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1872	#[cfg_attr(test, assert_instr(vmulps))]
1873	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1874	pub const fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1875	unsafe {
1876	let mul: Simd = _mm512_mul_ps(a, b).as_f32x16();
1877	transmute(src:simd_select_bitmask(m:k, yes:mul, no:f32x16::ZERO))
1878	}
1879	}
1880
1881	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1882	///
1883	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_ps&expand=3929)
1884	#[inline]
1885	#[target_feature(enable = "avx512f,avx512vl")]
1886	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1887	#[cfg_attr(test, assert_instr(vmulps))]
1888	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1889	pub const fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1890	unsafe {
1891	let mul: Simd = _mm256_mul_ps(a, b).as_f32x8();
1892	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f32x8()))
1893	}
1894	}
1895
1896	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1897	///
1898	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_ps&expand=3930)
1899	#[inline]
1900	#[target_feature(enable = "avx512f,avx512vl")]
1901	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1902	#[cfg_attr(test, assert_instr(vmulps))]
1903	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1904	pub const fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1905	unsafe {
1906	let mul: Simd = _mm256_mul_ps(a, b).as_f32x8();
1907	transmute(src:simd_select_bitmask(m:k, yes:mul, no:f32x8::ZERO))
1908	}
1909	}
1910
1911	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1912	///
1913	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_ps&expand=3926)
1914	#[inline]
1915	#[target_feature(enable = "avx512f,avx512vl")]
1916	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1917	#[cfg_attr(test, assert_instr(vmulps))]
1918	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1919	pub const fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1920	unsafe {
1921	let mul: Simd = _mm_mul_ps(a, b).as_f32x4();
1922	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f32x4()))
1923	}
1924	}
1925
1926	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1927	///
1928	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_ps&expand=3927)
1929	#[inline]
1930	#[target_feature(enable = "avx512f,avx512vl")]
1931	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1932	#[cfg_attr(test, assert_instr(vmulps))]
1933	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1934	pub const fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1935	unsafe {
1936	let mul: Simd = _mm_mul_ps(a, b).as_f32x4();
1937	transmute(src:simd_select_bitmask(m:k, yes:mul, no:f32x4::ZERO))
1938	}
1939	}
1940
1941	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
1942	///
1943	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_pd&expand=3925)
1944	#[inline]
1945	#[target_feature(enable = "avx512f")]
1946	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1947	#[cfg_attr(test, assert_instr(vmulpd))]
1948	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1949	pub const fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
1950	unsafe { transmute(src:simd_mul(x:a.as_f64x8(), y:b.as_f64x8())) }
1951	}
1952
1953	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1954	///
1955	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_pd&expand=3923)
1956	#[inline]
1957	#[target_feature(enable = "avx512f")]
1958	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1959	#[cfg_attr(test, assert_instr(vmulpd))]
1960	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1961	pub const fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1962	unsafe {
1963	let mul: Simd = _mm512_mul_pd(a, b).as_f64x8();
1964	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f64x8()))
1965	}
1966	}
1967
1968	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1969	///
1970	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_pd&expand=3924)
1971	#[inline]
1972	#[target_feature(enable = "avx512f")]
1973	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1974	#[cfg_attr(test, assert_instr(vmulpd))]
1975	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1976	pub const fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1977	unsafe {
1978	let mul: Simd = _mm512_mul_pd(a, b).as_f64x8();
1979	transmute(src:simd_select_bitmask(m:k, yes:mul, no:f64x8::ZERO))
1980	}
1981	}
1982
1983	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1984	///
1985	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_pd&expand=3920)
1986	#[inline]
1987	#[target_feature(enable = "avx512f,avx512vl")]
1988	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1989	#[cfg_attr(test, assert_instr(vmulpd))]
1990	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1991	pub const fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1992	unsafe {
1993	let mul: Simd = _mm256_mul_pd(a, b).as_f64x4();
1994	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f64x4()))
1995	}
1996	}
1997
1998	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1999	///
2000	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_pd&expand=3921)
2001	#[inline]
2002	#[target_feature(enable = "avx512f,avx512vl")]
2003	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2004	#[cfg_attr(test, assert_instr(vmulpd))]
2005	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2006	pub const fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2007	unsafe {
2008	let mul: Simd = _mm256_mul_pd(a, b).as_f64x4();
2009	transmute(src:simd_select_bitmask(m:k, yes:mul, no:f64x4::ZERO))
2010	}
2011	}
2012
2013	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2014	///
2015	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_pd&expand=3917)
2016	#[inline]
2017	#[target_feature(enable = "avx512f,avx512vl")]
2018	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2019	#[cfg_attr(test, assert_instr(vmulpd))]
2020	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2021	pub const fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2022	unsafe {
2023	let mul: Simd = _mm_mul_pd(a, b).as_f64x2();
2024	transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f64x2()))
2025	}
2026	}
2027
2028	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2029	///
2030	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_pd&expand=3918)
2031	#[inline]
2032	#[target_feature(enable = "avx512f,avx512vl")]
2033	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2034	#[cfg_attr(test, assert_instr(vmulpd))]
2035	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2036	pub const fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2037	unsafe {
2038	let mul: Simd = _mm_mul_pd(a, b).as_f64x2();
2039	transmute(src:simd_select_bitmask(m:k, yes:mul, no:f64x2::ZERO))
2040	}
2041	}
2042
2043	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.
2044	///
2045	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_ps&expand=2162)
2046	#[inline]
2047	#[target_feature(enable = "avx512f")]
2048	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2049	#[cfg_attr(test, assert_instr(vdivps))]
2050	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2051	pub const fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
2052	unsafe { transmute(src:simd_div(lhs:a.as_f32x16(), rhs:b.as_f32x16())) }
2053	}
2054
2055	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2056	///
2057	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_ps&expand=2163)
2058	#[inline]
2059	#[target_feature(enable = "avx512f")]
2060	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2061	#[cfg_attr(test, assert_instr(vdivps))]
2062	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2063	pub const fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2064	unsafe {
2065	let div: Simd = _mm512_div_ps(a, b).as_f32x16();
2066	transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f32x16()))
2067	}
2068	}
2069
2070	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2071	///
2072	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_ps&expand=2164)
2073	#[inline]
2074	#[target_feature(enable = "avx512f")]
2075	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2076	#[cfg_attr(test, assert_instr(vdivps))]
2077	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2078	pub const fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2079	unsafe {
2080	let div: Simd = _mm512_div_ps(a, b).as_f32x16();
2081	transmute(src:simd_select_bitmask(m:k, yes:div, no:f32x16::ZERO))
2082	}
2083	}
2084
2085	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2086	///
2087	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_ps&expand=2160)
2088	#[inline]
2089	#[target_feature(enable = "avx512f,avx512vl")]
2090	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2091	#[cfg_attr(test, assert_instr(vdivps))]
2092	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2093	pub const fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2094	unsafe {
2095	let div: Simd = _mm256_div_ps(a, b).as_f32x8();
2096	transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f32x8()))
2097	}
2098	}
2099
2100	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2101	///
2102	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_ps&expand=2161)
2103	#[inline]
2104	#[target_feature(enable = "avx512f,avx512vl")]
2105	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2106	#[cfg_attr(test, assert_instr(vdivps))]
2107	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2108	pub const fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2109	unsafe {
2110	let div: Simd = _mm256_div_ps(a, b).as_f32x8();
2111	transmute(src:simd_select_bitmask(m:k, yes:div, no:f32x8::ZERO))
2112	}
2113	}
2114
2115	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2116	///
2117	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_ps&expand=2157)
2118	#[inline]
2119	#[target_feature(enable = "avx512f,avx512vl")]
2120	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2121	#[cfg_attr(test, assert_instr(vdivps))]
2122	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2123	pub const fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2124	unsafe {
2125	let div: Simd = _mm_div_ps(a, b).as_f32x4();
2126	transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f32x4()))
2127	}
2128	}
2129
2130	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2131	///
2132	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_ps&expand=2158)
2133	#[inline]
2134	#[target_feature(enable = "avx512f,avx512vl")]
2135	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2136	#[cfg_attr(test, assert_instr(vdivps))]
2137	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2138	pub const fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2139	unsafe {
2140	let div: Simd = _mm_div_ps(a, b).as_f32x4();
2141	transmute(src:simd_select_bitmask(m:k, yes:div, no:f32x4::ZERO))
2142	}
2143	}
2144
2145	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst.
2146	///
2147	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_pd&expand=2153)
2148	#[inline]
2149	#[target_feature(enable = "avx512f")]
2150	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2151	#[cfg_attr(test, assert_instr(vdivpd))]
2152	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2153	pub const fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
2154	unsafe { transmute(src:simd_div(lhs:a.as_f64x8(), rhs:b.as_f64x8())) }
2155	}
2156
2157	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2158	///
2159	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_pd&expand=2154)
2160	#[inline]
2161	#[target_feature(enable = "avx512f")]
2162	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2163	#[cfg_attr(test, assert_instr(vdivpd))]
2164	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2165	pub const fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2166	unsafe {
2167	let div: Simd = _mm512_div_pd(a, b).as_f64x8();
2168	transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f64x8()))
2169	}
2170	}
2171
2172	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2173	///
2174	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_pd&expand=2155)
2175	#[inline]
2176	#[target_feature(enable = "avx512f")]
2177	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2178	#[cfg_attr(test, assert_instr(vdivpd))]
2179	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2180	pub const fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2181	unsafe {
2182	let div: Simd = _mm512_div_pd(a, b).as_f64x8();
2183	transmute(src:simd_select_bitmask(m:k, yes:div, no:f64x8::ZERO))
2184	}
2185	}
2186
2187	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2188	///
2189	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_pd&expand=2151)
2190	#[inline]
2191	#[target_feature(enable = "avx512f,avx512vl")]
2192	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2193	#[cfg_attr(test, assert_instr(vdivpd))]
2194	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2195	pub const fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2196	unsafe {
2197	let div: Simd = _mm256_div_pd(a, b).as_f64x4();
2198	transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f64x4()))
2199	}
2200	}
2201
2202	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2203	///
2204	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_pd&expand=2152)
2205	#[inline]
2206	#[target_feature(enable = "avx512f,avx512vl")]
2207	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2208	#[cfg_attr(test, assert_instr(vdivpd))]
2209	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2210	pub const fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2211	unsafe {
2212	let div: Simd = _mm256_div_pd(a, b).as_f64x4();
2213	transmute(src:simd_select_bitmask(m:k, yes:div, no:f64x4::ZERO))
2214	}
2215	}
2216
2217	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2218	///
2219	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_pd&expand=2148)
2220	#[inline]
2221	#[target_feature(enable = "avx512f,avx512vl")]
2222	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2223	#[cfg_attr(test, assert_instr(vdivpd))]
2224	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2225	pub const fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2226	unsafe {
2227	let div: Simd = _mm_div_pd(a, b).as_f64x2();
2228	transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f64x2()))
2229	}
2230	}
2231
2232	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2233	///
2234	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_pd&expand=2149)
2235	#[inline]
2236	#[target_feature(enable = "avx512f,avx512vl")]
2237	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2238	#[cfg_attr(test, assert_instr(vdivpd))]
2239	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2240	pub const fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2241	unsafe {
2242	let div: Simd = _mm_div_pd(a, b).as_f64x2();
2243	transmute(src:simd_select_bitmask(m:k, yes:div, no:f64x2::ZERO))
2244	}
2245	}
2246
2247	/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst.
2248	///
2249	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi32&expand=3582)
2250	#[inline]
2251	#[target_feature(enable = "avx512f")]
2252	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2253	#[cfg_attr(test, assert_instr(vpmaxsd))]
2254	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2255	pub const fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
2256	unsafe { simd_imax(a.as_i32x16(), b.as_i32x16()).as_m512i() }
2257	}
2258
2259	/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2260	///
2261	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi32&expand=3580)
2262	#[inline]
2263	#[target_feature(enable = "avx512f")]
2264	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2265	#[cfg_attr(test, assert_instr(vpmaxsd))]
2266	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2267	pub const fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2268	unsafe {
2269	let max: Simd = _mm512_max_epi32(a, b).as_i32x16();
2270	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i32x16()))
2271	}
2272	}
2273
2274	/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2275	///
2276	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi32&expand=3581)
2277	#[inline]
2278	#[target_feature(enable = "avx512f")]
2279	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2280	#[cfg_attr(test, assert_instr(vpmaxsd))]
2281	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2282	pub const fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2283	unsafe {
2284	let max: Simd = _mm512_max_epi32(a, b).as_i32x16();
2285	transmute(src:simd_select_bitmask(m:k, yes:max, no:i32x16::ZERO))
2286	}
2287	}
2288
2289	/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2290	///
2291	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi32&expand=3577)
2292	#[inline]
2293	#[target_feature(enable = "avx512f,avx512vl")]
2294	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2295	#[cfg_attr(test, assert_instr(vpmaxsd))]
2296	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2297	pub const fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2298	unsafe {
2299	let max: Simd = _mm256_max_epi32(a, b).as_i32x8();
2300	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i32x8()))
2301	}
2302	}
2303
2304	/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2305	///
2306	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi32&expand=3578)
2307	#[inline]
2308	#[target_feature(enable = "avx512f,avx512vl")]
2309	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2310	#[cfg_attr(test, assert_instr(vpmaxsd))]
2311	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2312	pub const fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2313	unsafe {
2314	let max: Simd = _mm256_max_epi32(a, b).as_i32x8();
2315	transmute(src:simd_select_bitmask(m:k, yes:max, no:i32x8::ZERO))
2316	}
2317	}
2318
2319	/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2320	///
2321	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi32&expand=3574)
2322	#[inline]
2323	#[target_feature(enable = "avx512f,avx512vl")]
2324	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2325	#[cfg_attr(test, assert_instr(vpmaxsd))]
2326	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2327	pub const fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2328	unsafe {
2329	let max: Simd = _mm_max_epi32(a, b).as_i32x4();
2330	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i32x4()))
2331	}
2332	}
2333
2334	/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2335	///
2336	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi32&expand=3575)
2337	#[inline]
2338	#[target_feature(enable = "avx512f,avx512vl")]
2339	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2340	#[cfg_attr(test, assert_instr(vpmaxsd))]
2341	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2342	pub const fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2343	unsafe {
2344	let max: Simd = _mm_max_epi32(a, b).as_i32x4();
2345	transmute(src:simd_select_bitmask(m:k, yes:max, no:i32x4::ZERO))
2346	}
2347	}
2348
2349	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2350	///
2351	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi64&expand=3591)
2352	#[inline]
2353	#[target_feature(enable = "avx512f")]
2354	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2355	#[cfg_attr(test, assert_instr(vpmaxsq))]
2356	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2357	pub const fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
2358	unsafe { simd_imax(a.as_i64x8(), b.as_i64x8()).as_m512i() }
2359	}
2360
2361	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2362	///
2363	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi64&expand=3589)
2364	#[inline]
2365	#[target_feature(enable = "avx512f")]
2366	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2367	#[cfg_attr(test, assert_instr(vpmaxsq))]
2368	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2369	pub const fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2370	unsafe {
2371	let max: Simd = _mm512_max_epi64(a, b).as_i64x8();
2372	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i64x8()))
2373	}
2374	}
2375
2376	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2377	///
2378	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi64&expand=3590)
2379	#[inline]
2380	#[target_feature(enable = "avx512f")]
2381	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2382	#[cfg_attr(test, assert_instr(vpmaxsq))]
2383	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2384	pub const fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2385	unsafe {
2386	let max: Simd = _mm512_max_epi64(a, b).as_i64x8();
2387	transmute(src:simd_select_bitmask(m:k, yes:max, no:i64x8::ZERO))
2388	}
2389	}
2390
2391	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2392	///
2393	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi64&expand=3588)
2394	#[inline]
2395	#[target_feature(enable = "avx512f,avx512vl")]
2396	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2397	#[cfg_attr(test, assert_instr(vpmaxsq))]
2398	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2399	pub const fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i {
2400	unsafe { simd_imax(a.as_i64x4(), b.as_i64x4()).as_m256i() }
2401	}
2402
2403	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2404	///
2405	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi64&expand=3586)
2406	#[inline]
2407	#[target_feature(enable = "avx512f,avx512vl")]
2408	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2409	#[cfg_attr(test, assert_instr(vpmaxsq))]
2410	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2411	pub const fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2412	unsafe {
2413	let max: Simd = _mm256_max_epi64(a, b).as_i64x4();
2414	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i64x4()))
2415	}
2416	}
2417
2418	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2419	///
2420	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi64&expand=3587)
2421	#[inline]
2422	#[target_feature(enable = "avx512f,avx512vl")]
2423	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2424	#[cfg_attr(test, assert_instr(vpmaxsq))]
2425	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2426	pub const fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2427	unsafe {
2428	let max: Simd = _mm256_max_epi64(a, b).as_i64x4();
2429	transmute(src:simd_select_bitmask(m:k, yes:max, no:i64x4::ZERO))
2430	}
2431	}
2432
2433	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2434	///
2435	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi64&expand=3585)
2436	#[inline]
2437	#[target_feature(enable = "avx512f,avx512vl")]
2438	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2439	#[cfg_attr(test, assert_instr(vpmaxsq))]
2440	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2441	pub const fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i {
2442	unsafe { simd_imax(a.as_i64x2(), b.as_i64x2()).as_m128i() }
2443	}
2444
2445	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2446	///
2447	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi64&expand=3583)
2448	#[inline]
2449	#[target_feature(enable = "avx512f,avx512vl")]
2450	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2451	#[cfg_attr(test, assert_instr(vpmaxsq))]
2452	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2453	pub const fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2454	unsafe {
2455	let max: Simd = _mm_max_epi64(a, b).as_i64x2();
2456	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i64x2()))
2457	}
2458	}
2459
2460	/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2461	///
2462	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi64&expand=3584)
2463	#[inline]
2464	#[target_feature(enable = "avx512f,avx512vl")]
2465	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2466	#[cfg_attr(test, assert_instr(vpmaxsq))]
2467	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2468	pub const fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2469	unsafe {
2470	let max: Simd = _mm_max_epi64(a, b).as_i64x2();
2471	transmute(src:simd_select_bitmask(m:k, yes:max, no:i64x2::ZERO))
2472	}
2473	}
2474
2475	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.
2476	///
2477	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_ps&expand=3655)
2478	#[inline]
2479	#[target_feature(enable = "avx512f")]
2480	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2481	#[cfg_attr(test, assert_instr(vmaxps))]
2482	pub fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 {
2483	unsafe {
2484	transmute(src:vmaxps(
2485	a.as_f32x16(),
2486	b.as_f32x16(),
2487	_MM_FROUND_CUR_DIRECTION,
2488	))
2489	}
2490	}
2491
2492	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2493	///
2494	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_ps&expand=3653)
2495	#[inline]
2496	#[target_feature(enable = "avx512f")]
2497	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2498	#[cfg_attr(test, assert_instr(vmaxps))]
2499	pub fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2500	unsafe {
2501	let max: Simd = _mm512_max_ps(a, b).as_f32x16();
2502	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f32x16()))
2503	}
2504	}
2505
2506	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2507	///
2508	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_ps&expand=3654)
2509	#[inline]
2510	#[target_feature(enable = "avx512f")]
2511	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2512	#[cfg_attr(test, assert_instr(vmaxps))]
2513	pub fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2514	unsafe {
2515	let max: Simd = _mm512_max_ps(a, b).as_f32x16();
2516	transmute(src:simd_select_bitmask(m:k, yes:max, no:f32x16::ZERO))
2517	}
2518	}
2519
2520	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2521	///
2522	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_ps&expand=3650)
2523	#[inline]
2524	#[target_feature(enable = "avx512f,avx512vl")]
2525	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2526	#[cfg_attr(test, assert_instr(vmaxps))]
2527	pub fn _mm256_mask_max_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2528	unsafe {
2529	let max: Simd = _mm256_max_ps(a, b).as_f32x8();
2530	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f32x8()))
2531	}
2532	}
2533
2534	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2535	///
2536	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_ps&expand=3651)
2537	#[inline]
2538	#[target_feature(enable = "avx512f,avx512vl")]
2539	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2540	#[cfg_attr(test, assert_instr(vmaxps))]
2541	pub fn _mm256_maskz_max_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2542	unsafe {
2543	let max: Simd = _mm256_max_ps(a, b).as_f32x8();
2544	transmute(src:simd_select_bitmask(m:k, yes:max, no:f32x8::ZERO))
2545	}
2546	}
2547
2548	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2549	///
2550	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_ps&expand=3647)
2551	#[inline]
2552	#[target_feature(enable = "avx512f,avx512vl")]
2553	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2554	#[cfg_attr(test, assert_instr(vmaxps))]
2555	pub fn _mm_mask_max_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2556	unsafe {
2557	let max: Simd = _mm_max_ps(a, b).as_f32x4();
2558	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f32x4()))
2559	}
2560	}
2561
2562	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2563	///
2564	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_ps&expand=3648)
2565	#[inline]
2566	#[target_feature(enable = "avx512f,avx512vl")]
2567	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2568	#[cfg_attr(test, assert_instr(vmaxps))]
2569	pub fn _mm_maskz_max_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2570	unsafe {
2571	let max: Simd = _mm_max_ps(a, b).as_f32x4();
2572	transmute(src:simd_select_bitmask(m:k, yes:max, no:f32x4::ZERO))
2573	}
2574	}
2575
2576	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.
2577	///
2578	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_pd&expand=3645)
2579	#[inline]
2580	#[target_feature(enable = "avx512f")]
2581	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2582	#[cfg_attr(test, assert_instr(vmaxpd))]
2583	pub fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d {
2584	unsafe { transmute(src:vmaxpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
2585	}
2586
2587	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2588	///
2589	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_pd&expand=3643)
2590	#[inline]
2591	#[target_feature(enable = "avx512f")]
2592	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2593	#[cfg_attr(test, assert_instr(vmaxpd))]
2594	pub fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2595	unsafe {
2596	let max: Simd = _mm512_max_pd(a, b).as_f64x8();
2597	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f64x8()))
2598	}
2599	}
2600
2601	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2602	///
2603	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_pd&expand=3644)
2604	#[inline]
2605	#[target_feature(enable = "avx512f")]
2606	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2607	#[cfg_attr(test, assert_instr(vmaxpd))]
2608	pub fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2609	unsafe {
2610	let max: Simd = _mm512_max_pd(a, b).as_f64x8();
2611	transmute(src:simd_select_bitmask(m:k, yes:max, no:f64x8::ZERO))
2612	}
2613	}
2614
2615	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2616	///
2617	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_pd&expand=3640)
2618	#[inline]
2619	#[target_feature(enable = "avx512f,avx512vl")]
2620	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2621	#[cfg_attr(test, assert_instr(vmaxpd))]
2622	pub fn _mm256_mask_max_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2623	unsafe {
2624	let max: Simd = _mm256_max_pd(a, b).as_f64x4();
2625	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f64x4()))
2626	}
2627	}
2628
2629	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2630	///
2631	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_pd&expand=3641)
2632	#[inline]
2633	#[target_feature(enable = "avx512f,avx512vl")]
2634	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2635	#[cfg_attr(test, assert_instr(vmaxpd))]
2636	pub fn _mm256_maskz_max_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2637	unsafe {
2638	let max: Simd = _mm256_max_pd(a, b).as_f64x4();
2639	transmute(src:simd_select_bitmask(m:k, yes:max, no:f64x4::ZERO))
2640	}
2641	}
2642
2643	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2644	///
2645	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_pd&expand=3637)
2646	#[inline]
2647	#[target_feature(enable = "avx512f,avx512vl")]
2648	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2649	#[cfg_attr(test, assert_instr(vmaxpd))]
2650	pub fn _mm_mask_max_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2651	unsafe {
2652	let max: Simd = _mm_max_pd(a, b).as_f64x2();
2653	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f64x2()))
2654	}
2655	}
2656
2657	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2658	///
2659	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_pd&expand=3638)
2660	#[inline]
2661	#[target_feature(enable = "avx512f,avx512vl")]
2662	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2663	#[cfg_attr(test, assert_instr(vmaxpd))]
2664	pub fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2665	unsafe {
2666	let max: Simd = _mm_max_pd(a, b).as_f64x2();
2667	transmute(src:simd_select_bitmask(m:k, yes:max, no:f64x2::ZERO))
2668	}
2669	}
2670
2671	/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst.
2672	///
2673	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu32&expand=3618)
2674	#[inline]
2675	#[target_feature(enable = "avx512f")]
2676	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2677	#[cfg_attr(test, assert_instr(vpmaxud))]
2678	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2679	pub const fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
2680	unsafe { simd_imax(a.as_u32x16(), b.as_u32x16()).as_m512i() }
2681	}
2682
2683	/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2684	///
2685	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu32&expand=3616)
2686	#[inline]
2687	#[target_feature(enable = "avx512f")]
2688	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2689	#[cfg_attr(test, assert_instr(vpmaxud))]
2690	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2691	pub const fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2692	unsafe {
2693	let max: Simd = _mm512_max_epu32(a, b).as_u32x16();
2694	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u32x16()))
2695	}
2696	}
2697
2698	/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2699	///
2700	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu32&expand=3617)
2701	#[inline]
2702	#[target_feature(enable = "avx512f")]
2703	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2704	#[cfg_attr(test, assert_instr(vpmaxud))]
2705	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2706	pub const fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2707	unsafe {
2708	let max: Simd = _mm512_max_epu32(a, b).as_u32x16();
2709	transmute(src:simd_select_bitmask(m:k, yes:max, no:u32x16::ZERO))
2710	}
2711	}
2712
2713	/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2714	///
2715	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu32&expand=3613)
2716	#[inline]
2717	#[target_feature(enable = "avx512f,avx512vl")]
2718	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2719	#[cfg_attr(test, assert_instr(vpmaxud))]
2720	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2721	pub const fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2722	unsafe {
2723	let max: Simd = _mm256_max_epu32(a, b).as_u32x8();
2724	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u32x8()))
2725	}
2726	}
2727
2728	/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2729	///
2730	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu32&expand=3614)
2731	#[inline]
2732	#[target_feature(enable = "avx512f,avx512vl")]
2733	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2734	#[cfg_attr(test, assert_instr(vpmaxud))]
2735	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2736	pub const fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2737	unsafe {
2738	let max: Simd = _mm256_max_epu32(a, b).as_u32x8();
2739	transmute(src:simd_select_bitmask(m:k, yes:max, no:u32x8::ZERO))
2740	}
2741	}
2742
2743	/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2744	///
2745	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu32&expand=3610)
2746	#[inline]
2747	#[target_feature(enable = "avx512f,avx512vl")]
2748	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2749	#[cfg_attr(test, assert_instr(vpmaxud))]
2750	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2751	pub const fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2752	unsafe {
2753	let max: Simd = _mm_max_epu32(a, b).as_u32x4();
2754	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u32x4()))
2755	}
2756	}
2757
2758	/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2759	///
2760	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu32&expand=3611)
2761	#[inline]
2762	#[target_feature(enable = "avx512f,avx512vl")]
2763	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2764	#[cfg_attr(test, assert_instr(vpmaxud))]
2765	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2766	pub const fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2767	unsafe {
2768	let max: Simd = _mm_max_epu32(a, b).as_u32x4();
2769	transmute(src:simd_select_bitmask(m:k, yes:max, no:u32x4::ZERO))
2770	}
2771	}
2772
2773	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2774	///
2775	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu64&expand=3627)
2776	#[inline]
2777	#[target_feature(enable = "avx512f")]
2778	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2779	#[cfg_attr(test, assert_instr(vpmaxuq))]
2780	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2781	pub const fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
2782	unsafe { simd_imax(a.as_u64x8(), b.as_u64x8()).as_m512i() }
2783	}
2784
2785	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2786	///
2787	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu64&expand=3625)
2788	#[inline]
2789	#[target_feature(enable = "avx512f")]
2790	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2791	#[cfg_attr(test, assert_instr(vpmaxuq))]
2792	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2793	pub const fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2794	unsafe {
2795	let max: Simd = _mm512_max_epu64(a, b).as_u64x8();
2796	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u64x8()))
2797	}
2798	}
2799
2800	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2801	///
2802	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu64&expand=3626)
2803	#[inline]
2804	#[target_feature(enable = "avx512f")]
2805	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2806	#[cfg_attr(test, assert_instr(vpmaxuq))]
2807	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2808	pub const fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2809	unsafe {
2810	let max: Simd = _mm512_max_epu64(a, b).as_u64x8();
2811	transmute(src:simd_select_bitmask(m:k, yes:max, no:u64x8::ZERO))
2812	}
2813	}
2814
2815	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2816	///
2817	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu64&expand=3624)
2818	#[inline]
2819	#[target_feature(enable = "avx512f,avx512vl")]
2820	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2821	#[cfg_attr(test, assert_instr(vpmaxuq))]
2822	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2823	pub const fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i {
2824	unsafe { simd_imax(a.as_u64x4(), b.as_u64x4()).as_m256i() }
2825	}
2826
2827	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2828	///
2829	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu64&expand=3622)
2830	#[inline]
2831	#[target_feature(enable = "avx512f,avx512vl")]
2832	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2833	#[cfg_attr(test, assert_instr(vpmaxuq))]
2834	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2835	pub const fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2836	unsafe {
2837	let max: Simd = _mm256_max_epu64(a, b).as_u64x4();
2838	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u64x4()))
2839	}
2840	}
2841
2842	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2843	///
2844	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu64&expand=3623)
2845	#[inline]
2846	#[target_feature(enable = "avx512f,avx512vl")]
2847	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2848	#[cfg_attr(test, assert_instr(vpmaxuq))]
2849	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2850	pub const fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2851	unsafe {
2852	let max: Simd = _mm256_max_epu64(a, b).as_u64x4();
2853	transmute(src:simd_select_bitmask(m:k, yes:max, no:u64x4::ZERO))
2854	}
2855	}
2856
2857	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2858	///
2859	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu64&expand=3621)
2860	#[inline]
2861	#[target_feature(enable = "avx512f,avx512vl")]
2862	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2863	#[cfg_attr(test, assert_instr(vpmaxuq))]
2864	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2865	pub const fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i {
2866	unsafe { simd_imax(a.as_u64x2(), b.as_u64x2()).as_m128i() }
2867	}
2868
2869	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2870	///
2871	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu64&expand=3619)
2872	#[inline]
2873	#[target_feature(enable = "avx512f,avx512vl")]
2874	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2875	#[cfg_attr(test, assert_instr(vpmaxuq))]
2876	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2877	pub const fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2878	unsafe {
2879	let max: Simd = _mm_max_epu64(a, b).as_u64x2();
2880	transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u64x2()))
2881	}
2882	}
2883
2884	/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2885	///
2886	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu64&expand=3620)
2887	#[inline]
2888	#[target_feature(enable = "avx512f,avx512vl")]
2889	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2890	#[cfg_attr(test, assert_instr(vpmaxuq))]
2891	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2892	pub const fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2893	unsafe {
2894	let max: Simd = _mm_max_epu64(a, b).as_u64x2();
2895	transmute(src:simd_select_bitmask(m:k, yes:max, no:u64x2::ZERO))
2896	}
2897	}
2898
2899	/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst.
2900	///
2901	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi32&expand=3696)
2902	#[inline]
2903	#[target_feature(enable = "avx512f")]
2904	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2905	#[cfg_attr(test, assert_instr(vpminsd))]
2906	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2907	pub const fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
2908	unsafe { simd_imin(a.as_i32x16(), b.as_i32x16()).as_m512i() }
2909	}
2910
2911	/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2912	///
2913	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi32&expand=3694)
2914	#[inline]
2915	#[target_feature(enable = "avx512f")]
2916	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2917	#[cfg_attr(test, assert_instr(vpminsd))]
2918	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2919	pub const fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2920	unsafe {
2921	let min: Simd = _mm512_min_epi32(a, b).as_i32x16();
2922	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i32x16()))
2923	}
2924	}
2925
2926	/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2927	///
2928	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi32&expand=3695)
2929	#[inline]
2930	#[target_feature(enable = "avx512f")]
2931	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2932	#[cfg_attr(test, assert_instr(vpminsd))]
2933	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2934	pub const fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2935	unsafe {
2936	let min: Simd = _mm512_min_epi32(a, b).as_i32x16();
2937	transmute(src:simd_select_bitmask(m:k, yes:min, no:i32x16::ZERO))
2938	}
2939	}
2940
2941	/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2942	///
2943	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi32&expand=3691)
2944	#[inline]
2945	#[target_feature(enable = "avx512f,avx512vl")]
2946	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2947	#[cfg_attr(test, assert_instr(vpminsd))]
2948	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2949	pub const fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2950	unsafe {
2951	let min: Simd = _mm256_min_epi32(a, b).as_i32x8();
2952	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i32x8()))
2953	}
2954	}
2955
2956	/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2957	///
2958	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi32&expand=3692)
2959	#[inline]
2960	#[target_feature(enable = "avx512f,avx512vl")]
2961	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2962	#[cfg_attr(test, assert_instr(vpminsd))]
2963	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2964	pub const fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2965	unsafe {
2966	let min: Simd = _mm256_min_epi32(a, b).as_i32x8();
2967	transmute(src:simd_select_bitmask(m:k, yes:min, no:i32x8::ZERO))
2968	}
2969	}
2970
2971	/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2972	///
2973	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi32&expand=3688)
2974	#[inline]
2975	#[target_feature(enable = "avx512f,avx512vl")]
2976	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2977	#[cfg_attr(test, assert_instr(vpminsd))]
2978	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2979	pub const fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2980	unsafe {
2981	let min: Simd = _mm_min_epi32(a, b).as_i32x4();
2982	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i32x4()))
2983	}
2984	}
2985
2986	/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2987	///
2988	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi32&expand=3689)
2989	#[inline]
2990	#[target_feature(enable = "avx512f,avx512vl")]
2991	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2992	#[cfg_attr(test, assert_instr(vpminsd))]
2993	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2994	pub const fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2995	unsafe {
2996	let min: Simd = _mm_min_epi32(a, b).as_i32x4();
2997	transmute(src:simd_select_bitmask(m:k, yes:min, no:i32x4::ZERO))
2998	}
2999	}
3000
3001	/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
3002	///
3003	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi64&expand=3705)
3004	#[inline]
3005	#[target_feature(enable = "avx512f")]
3006	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3007	#[cfg_attr(test, assert_instr(vpminsq))]
3008	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3009	pub const fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
3010	unsafe { simd_imin(a.as_i64x8(), b.as_i64x8()).as_m512i() }
3011	}
3012
3013	/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3014	///
3015	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi64&expand=3703)
3016	#[inline]
3017	#[target_feature(enable = "avx512f")]
3018	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3019	#[cfg_attr(test, assert_instr(vpminsq))]
3020	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3021	pub const fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3022	unsafe {
3023	let min: Simd = _mm512_min_epi64(a, b).as_i64x8();
3024	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i64x8()))
3025	}
3026	}
3027
3028	/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3029	///
3030	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi64&expand=3704)
3031	#[inline]
3032	#[target_feature(enable = "avx512f")]
3033	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3034	#[cfg_attr(test, assert_instr(vpminsq))]
3035	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3036	pub const fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3037	unsafe {
3038	let min: Simd = _mm512_min_epi64(a, b).as_i64x8();
3039	transmute(src:simd_select_bitmask(m:k, yes:min, no:i64x8::ZERO))
3040	}
3041	}
3042
3043	/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
3044	///
3045	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi64&expand=3702)
3046	#[inline]
3047	#[target_feature(enable = "avx512f,avx512vl")]
3048	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3049	#[cfg_attr(test, assert_instr(vpminsq))]
3050	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3051	pub const fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i {
3052	unsafe { simd_imin(a.as_i64x4(), b.as_i64x4()).as_m256i() }
3053	}
3054
3055	/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3056	///
3057	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi64&expand=3700)
3058	#[inline]
3059	#[target_feature(enable = "avx512f,avx512vl")]
3060	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3061	#[cfg_attr(test, assert_instr(vpminsq))]
3062	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3063	pub const fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3064	unsafe {
3065	let min: Simd = _mm256_min_epi64(a, b).as_i64x4();
3066	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i64x4()))
3067	}
3068	}
3069
3070	/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3071	///
3072	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi64&expand=3701)
3073	#[inline]
3074	#[target_feature(enable = "avx512f,avx512vl")]
3075	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3076	#[cfg_attr(test, assert_instr(vpminsq))]
3077	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3078	pub const fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3079	unsafe {
3080	let min: Simd = _mm256_min_epi64(a, b).as_i64x4();
3081	transmute(src:simd_select_bitmask(m:k, yes:min, no:i64x4::ZERO))
3082	}
3083	}
3084
3085	/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
3086	///
3087	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi64)
3088	#[inline]
3089	#[target_feature(enable = "avx512f,avx512vl")]
3090	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3091	#[cfg_attr(test, assert_instr(vpminsq))]
3092	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3093	pub const fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i {
3094	unsafe { simd_imin(a.as_i64x2(), b.as_i64x2()).as_m128i() }
3095	}
3096
3097	/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3098	///
3099	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi64)
3100	#[inline]
3101	#[target_feature(enable = "avx512f,avx512vl")]
3102	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3103	#[cfg_attr(test, assert_instr(vpminsq))]
3104	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3105	pub const fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3106	unsafe {
3107	let min: Simd = _mm_min_epi64(a, b).as_i64x2();
3108	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i64x2()))
3109	}
3110	}
3111
3112	/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3113	///
3114	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi64)
3115	#[inline]
3116	#[target_feature(enable = "avx512f,avx512vl")]
3117	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3118	#[cfg_attr(test, assert_instr(vpminsq))]
3119	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3120	pub const fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3121	unsafe {
3122	let min: Simd = _mm_min_epi64(a, b).as_i64x2();
3123	transmute(src:simd_select_bitmask(m:k, yes:min, no:i64x2::ZERO))
3124	}
3125	}
3126
3127	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.
3128	///
3129	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_ps&expand=3769)
3130	#[inline]
3131	#[target_feature(enable = "avx512f")]
3132	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3133	#[cfg_attr(test, assert_instr(vminps))]
3134	pub fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 {
3135	unsafe {
3136	transmute(src:vminps(
3137	a.as_f32x16(),
3138	b.as_f32x16(),
3139	_MM_FROUND_CUR_DIRECTION,
3140	))
3141	}
3142	}
3143
3144	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3145	///
3146	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_ps&expand=3767)
3147	#[inline]
3148	#[target_feature(enable = "avx512f")]
3149	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3150	#[cfg_attr(test, assert_instr(vminps))]
3151	pub fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
3152	unsafe {
3153	let min: Simd = _mm512_min_ps(a, b).as_f32x16();
3154	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f32x16()))
3155	}
3156	}
3157
3158	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3159	///
3160	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_ps&expand=3768)
3161	#[inline]
3162	#[target_feature(enable = "avx512f")]
3163	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3164	#[cfg_attr(test, assert_instr(vminps))]
3165	pub fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
3166	unsafe {
3167	let min: Simd = _mm512_min_ps(a, b).as_f32x16();
3168	transmute(src:simd_select_bitmask(m:k, yes:min, no:f32x16::ZERO))
3169	}
3170	}
3171
3172	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3173	///
3174	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_ps&expand=3764)
3175	#[inline]
3176	#[target_feature(enable = "avx512f,avx512vl")]
3177	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3178	#[cfg_attr(test, assert_instr(vminps))]
3179	pub fn _mm256_mask_min_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
3180	unsafe {
3181	let min: Simd = _mm256_min_ps(a, b).as_f32x8();
3182	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f32x8()))
3183	}
3184	}
3185
3186	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3187	///
3188	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_ps&expand=3765)
3189	#[inline]
3190	#[target_feature(enable = "avx512f,avx512vl")]
3191	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3192	#[cfg_attr(test, assert_instr(vminps))]
3193	pub fn _mm256_maskz_min_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
3194	unsafe {
3195	let min: Simd = _mm256_min_ps(a, b).as_f32x8();
3196	transmute(src:simd_select_bitmask(m:k, yes:min, no:f32x8::ZERO))
3197	}
3198	}
3199
3200	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3201	///
3202	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_ps&expand=3761)
3203	#[inline]
3204	#[target_feature(enable = "avx512f,avx512vl")]
3205	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3206	#[cfg_attr(test, assert_instr(vminps))]
3207	pub fn _mm_mask_min_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
3208	unsafe {
3209	let min: Simd = _mm_min_ps(a, b).as_f32x4();
3210	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f32x4()))
3211	}
3212	}
3213
3214	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3215	///
3216	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_ps&expand=3762)
3217	#[inline]
3218	#[target_feature(enable = "avx512f,avx512vl")]
3219	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3220	#[cfg_attr(test, assert_instr(vminps))]
3221	pub fn _mm_maskz_min_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
3222	unsafe {
3223	let min: Simd = _mm_min_ps(a, b).as_f32x4();
3224	transmute(src:simd_select_bitmask(m:k, yes:min, no:f32x4::ZERO))
3225	}
3226	}
3227
3228	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.
3229	///
3230	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_pd&expand=3759)
3231	#[inline]
3232	#[target_feature(enable = "avx512f")]
3233	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3234	#[cfg_attr(test, assert_instr(vminpd))]
3235	pub fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d {
3236	unsafe { transmute(src:vminpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
3237	}
3238
3239	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3240	///
3241	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_pd&expand=3757)
3242	#[inline]
3243	#[target_feature(enable = "avx512f")]
3244	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3245	#[cfg_attr(test, assert_instr(vminpd))]
3246	pub fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3247	unsafe {
3248	let min: Simd = _mm512_min_pd(a, b).as_f64x8();
3249	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f64x8()))
3250	}
3251	}
3252
3253	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3254	///
3255	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_pd&expand=3758)
3256	#[inline]
3257	#[target_feature(enable = "avx512f")]
3258	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3259	#[cfg_attr(test, assert_instr(vminpd))]
3260	pub fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3261	unsafe {
3262	let min: Simd = _mm512_min_pd(a, b).as_f64x8();
3263	transmute(src:simd_select_bitmask(m:k, yes:min, no:f64x8::ZERO))
3264	}
3265	}
3266
3267	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3268	///
3269	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_pd&expand=3754)
3270	#[inline]
3271	#[target_feature(enable = "avx512f,avx512vl")]
3272	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3273	#[cfg_attr(test, assert_instr(vminpd))]
3274	pub fn _mm256_mask_min_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3275	unsafe {
3276	let min: Simd = _mm256_min_pd(a, b).as_f64x4();
3277	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f64x4()))
3278	}
3279	}
3280
3281	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3282	///
3283	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_pd&expand=3755)
3284	#[inline]
3285	#[target_feature(enable = "avx512f,avx512vl")]
3286	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3287	#[cfg_attr(test, assert_instr(vminpd))]
3288	pub fn _mm256_maskz_min_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3289	unsafe {
3290	let min: Simd = _mm256_min_pd(a, b).as_f64x4();
3291	transmute(src:simd_select_bitmask(m:k, yes:min, no:f64x4::ZERO))
3292	}
3293	}
3294
3295	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3296	///
3297	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_pd&expand=3751)
3298	#[inline]
3299	#[target_feature(enable = "avx512f,avx512vl")]
3300	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3301	#[cfg_attr(test, assert_instr(vminpd))]
3302	pub fn _mm_mask_min_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3303	unsafe {
3304	let min: Simd = _mm_min_pd(a, b).as_f64x2();
3305	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f64x2()))
3306	}
3307	}
3308
3309	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3310	///
3311	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_pd&expand=3752)
3312	#[inline]
3313	#[target_feature(enable = "avx512f,avx512vl")]
3314	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3315	#[cfg_attr(test, assert_instr(vminpd))]
3316	pub fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3317	unsafe {
3318	let min: Simd = _mm_min_pd(a, b).as_f64x2();
3319	transmute(src:simd_select_bitmask(m:k, yes:min, no:f64x2::ZERO))
3320	}
3321	}
3322
3323	/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst.
3324	///
3325	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu32&expand=3732)
3326	#[inline]
3327	#[target_feature(enable = "avx512f")]
3328	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3329	#[cfg_attr(test, assert_instr(vpminud))]
3330	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3331	pub const fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
3332	unsafe { simd_imin(a.as_u32x16(), b.as_u32x16()).as_m512i() }
3333	}
3334
3335	/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3336	///
3337	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu32&expand=3730)
3338	#[inline]
3339	#[target_feature(enable = "avx512f")]
3340	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3341	#[cfg_attr(test, assert_instr(vpminud))]
3342	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3343	pub const fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3344	unsafe {
3345	let min: Simd = _mm512_min_epu32(a, b).as_u32x16();
3346	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u32x16()))
3347	}
3348	}
3349
3350	/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3351	///
3352	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu32&expand=3731)
3353	#[inline]
3354	#[target_feature(enable = "avx512f")]
3355	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3356	#[cfg_attr(test, assert_instr(vpminud))]
3357	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3358	pub const fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3359	unsafe {
3360	let min: Simd = _mm512_min_epu32(a, b).as_u32x16();
3361	transmute(src:simd_select_bitmask(m:k, yes:min, no:u32x16::ZERO))
3362	}
3363	}
3364
3365	/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3366	///
3367	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu32&expand=3727)
3368	#[inline]
3369	#[target_feature(enable = "avx512f,avx512vl")]
3370	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3371	#[cfg_attr(test, assert_instr(vpminud))]
3372	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3373	pub const fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3374	unsafe {
3375	let min: Simd = _mm256_min_epu32(a, b).as_u32x8();
3376	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u32x8()))
3377	}
3378	}
3379
3380	/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3381	///
3382	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu32&expand=3728)
3383	#[inline]
3384	#[target_feature(enable = "avx512f,avx512vl")]
3385	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3386	#[cfg_attr(test, assert_instr(vpminud))]
3387	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3388	pub const fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3389	unsafe {
3390	let min: Simd = _mm256_min_epu32(a, b).as_u32x8();
3391	transmute(src:simd_select_bitmask(m:k, yes:min, no:u32x8::ZERO))
3392	}
3393	}
3394
3395	/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3396	///
3397	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu32&expand=3724)
3398	#[inline]
3399	#[target_feature(enable = "avx512f,avx512vl")]
3400	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3401	#[cfg_attr(test, assert_instr(vpminud))]
3402	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3403	pub const fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3404	unsafe {
3405	let min: Simd = _mm_min_epu32(a, b).as_u32x4();
3406	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u32x4()))
3407	}
3408	}
3409
3410	/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3411	///
3412	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu32&expand=3725)
3413	#[inline]
3414	#[target_feature(enable = "avx512f,avx512vl")]
3415	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3416	#[cfg_attr(test, assert_instr(vpminud))]
3417	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3418	pub const fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3419	unsafe {
3420	let min: Simd = _mm_min_epu32(a, b).as_u32x4();
3421	transmute(src:simd_select_bitmask(m:k, yes:min, no:u32x4::ZERO))
3422	}
3423	}
3424
3425	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3426	///
3427	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu64&expand=3741)
3428	#[inline]
3429	#[target_feature(enable = "avx512f")]
3430	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3431	#[cfg_attr(test, assert_instr(vpminuq))]
3432	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3433	pub const fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
3434	unsafe { simd_imin(a.as_u64x8(), b.as_u64x8()).as_m512i() }
3435	}
3436
3437	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3438	///
3439	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu64&expand=3739)
3440	#[inline]
3441	#[target_feature(enable = "avx512f")]
3442	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3443	#[cfg_attr(test, assert_instr(vpminuq))]
3444	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3445	pub const fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3446	unsafe {
3447	let min: Simd = _mm512_min_epu64(a, b).as_u64x8();
3448	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u64x8()))
3449	}
3450	}
3451
3452	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3453	///
3454	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu64&expand=3740)
3455	#[inline]
3456	#[target_feature(enable = "avx512f")]
3457	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3458	#[cfg_attr(test, assert_instr(vpminuq))]
3459	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3460	pub const fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3461	unsafe {
3462	let min: Simd = _mm512_min_epu64(a, b).as_u64x8();
3463	transmute(src:simd_select_bitmask(m:k, yes:min, no:u64x8::ZERO))
3464	}
3465	}
3466
3467	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3468	///
3469	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu64&expand=3738)
3470	#[inline]
3471	#[target_feature(enable = "avx512f,avx512vl")]
3472	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3473	#[cfg_attr(test, assert_instr(vpminuq))]
3474	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3475	pub const fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i {
3476	unsafe { simd_imin(a.as_u64x4(), b.as_u64x4()).as_m256i() }
3477	}
3478
3479	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3480	///
3481	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu64&expand=3736)
3482	#[inline]
3483	#[target_feature(enable = "avx512f,avx512vl")]
3484	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3485	#[cfg_attr(test, assert_instr(vpminuq))]
3486	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3487	pub const fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3488	unsafe {
3489	let min: Simd = _mm256_min_epu64(a, b).as_u64x4();
3490	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u64x4()))
3491	}
3492	}
3493
3494	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3495	///
3496	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu64&expand=3737)
3497	#[inline]
3498	#[target_feature(enable = "avx512f,avx512vl")]
3499	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3500	#[cfg_attr(test, assert_instr(vpminuq))]
3501	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3502	pub const fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3503	unsafe {
3504	let min: Simd = _mm256_min_epu64(a, b).as_u64x4();
3505	transmute(src:simd_select_bitmask(m:k, yes:min, no:u64x4::ZERO))
3506	}
3507	}
3508
3509	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3510	///
3511	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu64&expand=3735)
3512	#[inline]
3513	#[target_feature(enable = "avx512f,avx512vl")]
3514	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3515	#[cfg_attr(test, assert_instr(vpminuq))]
3516	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3517	pub const fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i {
3518	unsafe { simd_imin(a.as_u64x2(), b.as_u64x2()).as_m128i() }
3519	}
3520
3521	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3522	///
3523	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu64&expand=3733)
3524	#[inline]
3525	#[target_feature(enable = "avx512f,avx512vl")]
3526	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3527	#[cfg_attr(test, assert_instr(vpminuq))]
3528	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3529	pub const fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3530	unsafe {
3531	let min: Simd = _mm_min_epu64(a, b).as_u64x2();
3532	transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u64x2()))
3533	}
3534	}
3535
3536	/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3537	///
3538	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu64&expand=3734)
3539	#[inline]
3540	#[target_feature(enable = "avx512f,avx512vl")]
3541	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3542	#[cfg_attr(test, assert_instr(vpminuq))]
3543	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3544	pub const fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3545	unsafe {
3546	let min: Simd = _mm_min_epu64(a, b).as_u64x2();
3547	transmute(src:simd_select_bitmask(m:k, yes:min, no:u64x2::ZERO))
3548	}
3549	}
3550
3551	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
3552	///
3553	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_ps&expand=5371)
3554	#[inline]
3555	#[target_feature(enable = "avx512f")]
3556	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3557	#[cfg_attr(test, assert_instr(vsqrtps))]
3558	pub fn _mm512_sqrt_ps(a: __m512) -> __m512 {
3559	unsafe { simd_fsqrt(a) }
3560	}
3561
3562	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3563	///
3564	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_ps&expand=5369)
3565	#[inline]
3566	#[target_feature(enable = "avx512f")]
3567	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3568	#[cfg_attr(test, assert_instr(vsqrtps))]
3569	pub fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
3570	unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3571	}
3572
3573	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3574	///
3575	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_ps&expand=5370)
3576	#[inline]
3577	#[target_feature(enable = "avx512f")]
3578	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3579	#[cfg_attr(test, assert_instr(vsqrtps))]
3580	pub fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
3581	unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm512_setzero_ps()) }
3582	}
3583
3584	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3585	///
3586	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_ps&expand=5366)
3587	#[inline]
3588	#[target_feature(enable = "avx512f,avx512vl")]
3589	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3590	#[cfg_attr(test, assert_instr(vsqrtps))]
3591	pub fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
3592	unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3593	}
3594
3595	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3596	///
3597	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_ps&expand=5367)
3598	#[inline]
3599	#[target_feature(enable = "avx512f,avx512vl")]
3600	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3601	#[cfg_attr(test, assert_instr(vsqrtps))]
3602	pub fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 {
3603	unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm256_setzero_ps()) }
3604	}
3605
3606	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3607	///
3608	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_ps&expand=5363)
3609	#[inline]
3610	#[target_feature(enable = "avx512f,avx512vl")]
3611	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3612	#[cfg_attr(test, assert_instr(vsqrtps))]
3613	pub fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
3614	unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3615	}
3616
3617	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3618	///
3619	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_ps&expand=5364)
3620	#[inline]
3621	#[target_feature(enable = "avx512f,avx512vl")]
3622	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3623	#[cfg_attr(test, assert_instr(vsqrtps))]
3624	pub fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 {
3625	unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm_setzero_ps()) }
3626	}
3627
3628	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
3629	///
3630	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_pd&expand=5362)
3631	#[inline]
3632	#[target_feature(enable = "avx512f")]
3633	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3634	#[cfg_attr(test, assert_instr(vsqrtpd))]
3635	pub fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
3636	unsafe { simd_fsqrt(a) }
3637	}
3638
3639	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3640	///
3641	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_pd&expand=5360)
3642	#[inline]
3643	#[target_feature(enable = "avx512f")]
3644	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3645	#[cfg_attr(test, assert_instr(vsqrtpd))]
3646	pub fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
3647	unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3648	}
3649
3650	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3651	///
3652	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_pd&expand=5361)
3653	#[inline]
3654	#[target_feature(enable = "avx512f")]
3655	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3656	#[cfg_attr(test, assert_instr(vsqrtpd))]
3657	pub fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
3658	unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm512_setzero_pd()) }
3659	}
3660
3661	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3662	///
3663	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_pd&expand=5357)
3664	#[inline]
3665	#[target_feature(enable = "avx512f,avx512vl")]
3666	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3667	#[cfg_attr(test, assert_instr(vsqrtpd))]
3668	pub fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
3669	unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3670	}
3671
3672	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3673	///
3674	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_pd&expand=5358)
3675	#[inline]
3676	#[target_feature(enable = "avx512f,avx512vl")]
3677	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3678	#[cfg_attr(test, assert_instr(vsqrtpd))]
3679	pub fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d {
3680	unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm256_setzero_pd()) }
3681	}
3682
3683	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3684	///
3685	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_pd&expand=5354)
3686	#[inline]
3687	#[target_feature(enable = "avx512f,avx512vl")]
3688	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3689	#[cfg_attr(test, assert_instr(vsqrtpd))]
3690	pub fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
3691	unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3692	}
3693
3694	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3695	///
3696	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_pd&expand=5355)
3697	#[inline]
3698	#[target_feature(enable = "avx512f,avx512vl")]
3699	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3700	#[cfg_attr(test, assert_instr(vsqrtpd))]
3701	pub fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d {
3702	unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm_setzero_pd()) }
3703	}
3704
3705	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3706	///
3707	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_ps&expand=2557)
3708	#[inline]
3709	#[target_feature(enable = "avx512f")]
3710	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3711	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3712	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3713	pub const fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3714	unsafe { simd_fma(x:a, y:b, z:c) }
3715	}
3716
3717	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3718	///
3719	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_ps&expand=2558)
3720	#[inline]
3721	#[target_feature(enable = "avx512f")]
3722	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3723	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3724	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3725	pub const fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3726	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ps(a, b, c), no:a) }
3727	}
3728
3729	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3730	///
3731	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_ps&expand=2560)
3732	#[inline]
3733	#[target_feature(enable = "avx512f")]
3734	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3735	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3736	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3737	pub const fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3738	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ps(a, b, c), no:_mm512_setzero_ps()) }
3739	}
3740
3741	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3742	///
3743	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_ps&expand=2559)
3744	#[inline]
3745	#[target_feature(enable = "avx512f")]
3746	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3747	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3748	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3749	pub const fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3750	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ps(a, b, c), no:c) }
3751	}
3752
3753	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3754	///
3755	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_ps&expand=2554)
3756	#[inline]
3757	#[target_feature(enable = "avx512f,avx512vl")]
3758	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3759	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3760	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3761	pub const fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3762	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ps(a, b, c), no:a) }
3763	}
3764
3765	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3766	///
3767	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_ps&expand=2556)
3768	#[inline]
3769	#[target_feature(enable = "avx512f,avx512vl")]
3770	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3771	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3772	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3773	pub const fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3774	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ps(a, b, c), no:_mm256_setzero_ps()) }
3775	}
3776
3777	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3778	///
3779	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_ps&expand=2555)
3780	#[inline]
3781	#[target_feature(enable = "avx512f,avx512vl")]
3782	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3783	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3784	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3785	pub const fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3786	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ps(a, b, c), no:c) }
3787	}
3788
3789	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3790	///
3791	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_ps&expand=2550)
3792	#[inline]
3793	#[target_feature(enable = "avx512f,avx512vl")]
3794	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3795	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3796	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3797	pub const fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3798	unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ps(a, b, c), no:a) }
3799	}
3800
3801	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3802	///
3803	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_ps&expand=2552)
3804	#[inline]
3805	#[target_feature(enable = "avx512f,avx512vl")]
3806	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3807	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3808	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3809	pub const fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3810	unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ps(a, b, c), no:_mm_setzero_ps()) }
3811	}
3812
3813	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3814	///
3815	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_ps&expand=2551)
3816	#[inline]
3817	#[target_feature(enable = "avx512f,avx512vl")]
3818	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3819	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3820	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3821	pub const fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3822	unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ps(a, b, c), no:c) }
3823	}
3824
3825	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3826	///
3827	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_pd&expand=2545)
3828	#[inline]
3829	#[target_feature(enable = "avx512f")]
3830	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3831	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3832	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3833	pub const fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3834	unsafe { simd_fma(x:a, y:b, z:c) }
3835	}
3836
3837	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3838	///
3839	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_pd&expand=2546)
3840	#[inline]
3841	#[target_feature(enable = "avx512f")]
3842	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3843	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3844	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3845	pub const fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3846	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_pd(a, b, c), no:a) }
3847	}
3848
3849	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3850	///
3851	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_pd&expand=2548)
3852	#[inline]
3853	#[target_feature(enable = "avx512f")]
3854	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3855	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3856	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3857	pub const fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3858	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_pd(a, b, c), no:_mm512_setzero_pd()) }
3859	}
3860
3861	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3862	///
3863	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_pd&expand=2547)
3864	#[inline]
3865	#[target_feature(enable = "avx512f")]
3866	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3867	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3868	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3869	pub const fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3870	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_pd(a, b, c), no:c) }
3871	}
3872
3873	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3874	///
3875	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_pd&expand=2542)
3876	#[inline]
3877	#[target_feature(enable = "avx512f,avx512vl")]
3878	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3879	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3880	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3881	pub const fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3882	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_pd(a, b, c), no:a) }
3883	}
3884
3885	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3886	///
3887	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_pd&expand=2544)
3888	#[inline]
3889	#[target_feature(enable = "avx512f,avx512vl")]
3890	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3891	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3892	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3893	pub const fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3894	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_pd(a, b, c), no:_mm256_setzero_pd()) }
3895	}
3896
3897	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3898	///
3899	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_pd&expand=2543)
3900	#[inline]
3901	#[target_feature(enable = "avx512f,avx512vl")]
3902	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3903	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3904	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3905	pub const fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3906	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_pd(a, b, c), no:c) }
3907	}
3908
3909	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3910	///
3911	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_pd&expand=2538)
3912	#[inline]
3913	#[target_feature(enable = "avx512f,avx512vl")]
3914	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3915	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3916	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3917	pub const fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3918	unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_pd(a, b, c), no:a) }
3919	}
3920
3921	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3922	///
3923	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_pd&expand=2540)
3924	#[inline]
3925	#[target_feature(enable = "avx512f,avx512vl")]
3926	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3927	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3928	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3929	pub const fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3930	unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_pd(a, b, c), no:_mm_setzero_pd()) }
3931	}
3932
3933	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3934	///
3935	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_pd&expand=2539)
3936	#[inline]
3937	#[target_feature(enable = "avx512f,avx512vl")]
3938	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3939	#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3940	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3941	pub const fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3942	unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_pd(a, b, c), no:c) }
3943	}
3944
3945	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3946	///
3947	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_ps&expand=2643)
3948	#[inline]
3949	#[target_feature(enable = "avx512f")]
3950	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3951	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3952	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3953	pub const fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3954	unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
3955	}
3956
3957	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3958	///
3959	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_ps&expand=2644)
3960	#[inline]
3961	#[target_feature(enable = "avx512f")]
3962	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3963	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3964	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3965	pub const fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3966	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ps(a, b, c), no:a) }
3967	}
3968
3969	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3970	///
3971	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_ps&expand=2646)
3972	#[inline]
3973	#[target_feature(enable = "avx512f")]
3974	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3975	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3976	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3977	pub const fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3978	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ps(a, b, c), no:_mm512_setzero_ps()) }
3979	}
3980
3981	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3982	///
3983	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_ps&expand=2645)
3984	#[inline]
3985	#[target_feature(enable = "avx512f")]
3986	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3987	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3988	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3989	pub const fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3990	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ps(a, b, c), no:c) }
3991	}
3992
3993	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3994	///
3995	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_ps&expand=2640)
3996	#[inline]
3997	#[target_feature(enable = "avx512f,avx512vl")]
3998	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3999	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4000	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4001	pub const fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4002	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ps(a, b, c), no:a) }
4003	}
4004
4005	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4006	///
4007	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_ps&expand=2642)
4008	#[inline]
4009	#[target_feature(enable = "avx512f,avx512vl")]
4010	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4011	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4012	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4013	pub const fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4014	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ps(a, b, c), no:_mm256_setzero_ps()) }
4015	}
4016
4017	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4018	///
4019	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_ps&expand=2641)
4020	#[inline]
4021	#[target_feature(enable = "avx512f,avx512vl")]
4022	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4023	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4024	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4025	pub const fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4026	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ps(a, b, c), no:c) }
4027	}
4028
4029	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4030	///
4031	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_ps&expand=2636)
4032	#[inline]
4033	#[target_feature(enable = "avx512f,avx512vl")]
4034	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4035	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4036	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4037	pub const fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4038	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ps(a, b, c), no:a) }
4039	}
4040
4041	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4042	///
4043	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_ps&expand=2638)
4044	#[inline]
4045	#[target_feature(enable = "avx512f,avx512vl")]
4046	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4047	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4048	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4049	pub const fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4050	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ps(a, b, c), no:_mm_setzero_ps()) }
4051	}
4052
4053	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4054	///
4055	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_ps&expand=2637)
4056	#[inline]
4057	#[target_feature(enable = "avx512f,avx512vl")]
4058	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4059	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4060	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4061	pub const fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4062	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ps(a, b, c), no:c) }
4063	}
4064
4065	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
4066	///
4067	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_pd&expand=2631)
4068	#[inline]
4069	#[target_feature(enable = "avx512f")]
4070	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4071	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4072	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4073	pub const fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4074	unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
4075	}
4076
4077	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4078	///
4079	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_pd&expand=2632)
4080	#[inline]
4081	#[target_feature(enable = "avx512f")]
4082	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4083	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4084	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4085	pub const fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4086	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_pd(a, b, c), no:a) }
4087	}
4088
4089	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4090	///
4091	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_pd&expand=2634)
4092	#[inline]
4093	#[target_feature(enable = "avx512f")]
4094	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4095	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4096	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4097	pub const fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4098	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_pd(a, b, c), no:_mm512_setzero_pd()) }
4099	}
4100
4101	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4102	///
4103	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_pd&expand=2633)
4104	#[inline]
4105	#[target_feature(enable = "avx512f")]
4106	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4107	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4108	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4109	pub const fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4110	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_pd(a, b, c), no:c) }
4111	}
4112
4113	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4114	///
4115	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_pd&expand=2628)
4116	#[inline]
4117	#[target_feature(enable = "avx512f,avx512vl")]
4118	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4119	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4120	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4121	pub const fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4122	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_pd(a, b, c), no:a) }
4123	}
4124
4125	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4126	///
4127	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_pd&expand=2630)
4128	#[inline]
4129	#[target_feature(enable = "avx512f,avx512vl")]
4130	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4131	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4132	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4133	pub const fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4134	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_pd(a, b, c), no:_mm256_setzero_pd()) }
4135	}
4136
4137	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4138	///
4139	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_pd&expand=2629)
4140	#[inline]
4141	#[target_feature(enable = "avx512f,avx512vl")]
4142	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4143	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4144	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4145	pub const fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4146	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_pd(a, b, c), no:c) }
4147	}
4148
4149	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4150	///
4151	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_pd&expand=2624)
4152	#[inline]
4153	#[target_feature(enable = "avx512f,avx512vl")]
4154	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4155	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4156	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4157	pub const fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4158	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_pd(a, b, c), no:a) }
4159	}
4160
4161	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4162	///
4163	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_pd&expand=2626)
4164	#[inline]
4165	#[target_feature(enable = "avx512f,avx512vl")]
4166	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4167	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4168	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4169	pub const fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4170	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_pd(a, b, c), no:_mm_setzero_pd()) }
4171	}
4172
4173	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4174	///
4175	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_pd&expand=2625)
4176	#[inline]
4177	#[target_feature(enable = "avx512f,avx512vl")]
4178	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4179	#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4180	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4181	pub const fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4182	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_pd(a, b, c), no:c) }
4183	}
4184
4185	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
4186	///
4187	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_ps&expand=2611)
4188	#[inline]
4189	#[target_feature(enable = "avx512f")]
4190	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4191	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4192	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4193	pub const fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4194	unsafe {
4195	let add: __m512 = simd_fma(x:a, y:b, z:c);
4196	let sub: __m512 = simd_fma(x:a, y:b, z:simd_neg(c));
4197	simd_shuffle!(
4198	add,
4199	sub,
4200	[`16`, `1`, `18`, `3`, `20`, `5`, `22`, `7`, `24`, `9`, `26`, `11`, `28`, `13`, `30`, `15`]
4201	)
4202	}
4203	}
4204
4205	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4206	///
4207	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_ps&expand=2612)
4208	#[inline]
4209	#[target_feature(enable = "avx512f")]
4210	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4211	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4212	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4213	pub const fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4214	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ps(a, b, c), no:a) }
4215	}
4216
4217	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4218	///
4219	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_ps&expand=2614)
4220	#[inline]
4221	#[target_feature(enable = "avx512f")]
4222	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4223	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4224	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4225	pub const fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4226	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ps(a, b, c), no:_mm512_setzero_ps()) }
4227	}
4228
4229	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4230	///
4231	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_ps&expand=2613)
4232	#[inline]
4233	#[target_feature(enable = "avx512f")]
4234	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4235	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4236	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4237	pub const fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4238	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ps(a, b, c), no:c) }
4239	}
4240
4241	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4242	///
4243	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_ps&expand=2608)
4244	#[inline]
4245	#[target_feature(enable = "avx512f,avx512vl")]
4246	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4247	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4248	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4249	pub const fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4250	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ps(a, b, c), no:a) }
4251	}
4252
4253	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4254	///
4255	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_ps&expand=2610)
4256	#[inline]
4257	#[target_feature(enable = "avx512f,avx512vl")]
4258	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4259	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4260	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4261	pub const fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4262	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ps(a, b, c), no:_mm256_setzero_ps()) }
4263	}
4264
4265	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4266	///
4267	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_ps&expand=2609)
4268	#[inline]
4269	#[target_feature(enable = "avx512f,avx512vl")]
4270	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4271	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4272	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4273	pub const fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4274	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ps(a, b, c), no:c) }
4275	}
4276
4277	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4278	///
4279	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_ps&expand=2604)
4280	#[inline]
4281	#[target_feature(enable = "avx512f,avx512vl")]
4282	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4283	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4284	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4285	pub const fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4286	unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ps(a, b, c), no:a) }
4287	}
4288
4289	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4290	///
4291	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ps&expand=2606)
4292	#[inline]
4293	#[target_feature(enable = "avx512f,avx512vl")]
4294	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4295	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4296	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4297	pub const fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4298	unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ps(a, b, c), no:_mm_setzero_ps()) }
4299	}
4300
4301	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4302	///
4303	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_ps&expand=2605)
4304	#[inline]
4305	#[target_feature(enable = "avx512f,avx512vl")]
4306	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4307	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4308	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4309	pub const fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4310	unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ps(a, b, c), no:c) }
4311	}
4312
4313	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
4314	///
4315	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_pd&expand=2599)
4316	#[inline]
4317	#[target_feature(enable = "avx512f")]
4318	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4319	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4320	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4321	pub const fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4322	unsafe {
4323	let add: __m512d = simd_fma(x:a, y:b, z:c);
4324	let sub: __m512d = simd_fma(x:a, y:b, z:simd_neg(c));
4325	simd_shuffle!(add, sub, [`8`, `1`, `10`, `3`, `12`, `5`, `14`, `7`])
4326	}
4327	}
4328
4329	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4330	///
4331	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_pd&expand=2600)
4332	#[inline]
4333	#[target_feature(enable = "avx512f")]
4334	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4335	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4336	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4337	pub const fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4338	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_pd(a, b, c), no:a) }
4339	}
4340
4341	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4342	///
4343	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_pd&expand=2602)
4344	#[inline]
4345	#[target_feature(enable = "avx512f")]
4346	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4347	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4348	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4349	pub const fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4350	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_pd(a, b, c), no:_mm512_setzero_pd()) }
4351	}
4352
4353	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4354	///
4355	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_pd&expand=2613)
4356	#[inline]
4357	#[target_feature(enable = "avx512f")]
4358	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4359	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4360	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4361	pub const fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4362	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_pd(a, b, c), no:c) }
4363	}
4364
4365	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4366	///
4367	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_pd&expand=2596)
4368	#[inline]
4369	#[target_feature(enable = "avx512f,avx512vl")]
4370	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4371	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4372	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4373	pub const fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4374	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_pd(a, b, c), no:a) }
4375	}
4376
4377	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4378	///
4379	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_pd&expand=2598)
4380	#[inline]
4381	#[target_feature(enable = "avx512f,avx512vl")]
4382	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4383	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4384	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4385	pub const fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4386	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_pd(a, b, c), no:_mm256_setzero_pd()) }
4387	}
4388
4389	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4390	///
4391	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_pd&expand=2597)
4392	#[inline]
4393	#[target_feature(enable = "avx512f,avx512vl")]
4394	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4395	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4396	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4397	pub const fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4398	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_pd(a, b, c), no:c) }
4399	}
4400
4401	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4402	///
4403	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_pd&expand=2592)
4404	#[inline]
4405	#[target_feature(enable = "avx512f,avx512vl")]
4406	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4407	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4408	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4409	pub const fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4410	unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_pd(a, b, c), no:a) }
4411	}
4412
4413	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4414	///
4415	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmaddsub_pd&expand=2594)
4416	#[inline]
4417	#[target_feature(enable = "avx512f,avx512vl")]
4418	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4419	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4420	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4421	pub const fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4422	unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_pd(a, b, c), no:_mm_setzero_pd()) }
4423	}
4424
4425	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4426	///
4427	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_pd&expand=2593)
4428	#[inline]
4429	#[target_feature(enable = "avx512f,avx512vl")]
4430	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4431	#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4432	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4433	pub const fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4434	unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_pd(a, b, c), no:c) }
4435	}
4436
4437	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4438	///
4439	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_ps&expand=2691)
4440	#[inline]
4441	#[target_feature(enable = "avx512f")]
4442	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4443	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4444	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4445	pub const fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4446	unsafe {
4447	let add: __m512 = simd_fma(x:a, y:b, z:c);
4448	let sub: __m512 = simd_fma(x:a, y:b, z:simd_neg(c));
4449	simd_shuffle!(
4450	add,
4451	sub,
4452	[`0`, `17`, `2`, `19`, `4`, `21`, `6`, `23`, `8`, `25`, `10`, `27`, `12`, `29`, `14`, `31`]
4453	)
4454	}
4455	}
4456
4457	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4458	///
4459	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_ps&expand=2692)
4460	#[inline]
4461	#[target_feature(enable = "avx512f")]
4462	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4463	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4464	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4465	pub const fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4466	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ps(a, b, c), no:a) }
4467	}
4468
4469	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4470	///
4471	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_ps&expand=2694)
4472	#[inline]
4473	#[target_feature(enable = "avx512f")]
4474	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4475	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4476	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4477	pub const fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4478	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ps(a, b, c), no:_mm512_setzero_ps()) }
4479	}
4480
4481	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4482	///
4483	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_ps&expand=2693)
4484	#[inline]
4485	#[target_feature(enable = "avx512f")]
4486	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4487	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4488	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4489	pub const fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4490	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ps(a, b, c), no:c) }
4491	}
4492
4493	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4494	///
4495	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_ps&expand=2688)
4496	#[inline]
4497	#[target_feature(enable = "avx512f,avx512vl")]
4498	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4499	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4500	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4501	pub const fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4502	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ps(a, b, c), no:a) }
4503	}
4504
4505	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4506	///
4507	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_ps&expand=2690)
4508	#[inline]
4509	#[target_feature(enable = "avx512f,avx512vl")]
4510	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4511	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4512	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4513	pub const fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4514	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ps(a, b, c), no:_mm256_setzero_ps()) }
4515	}
4516
4517	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4518	///
4519	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_ps&expand=2689)
4520	#[inline]
4521	#[target_feature(enable = "avx512f,avx512vl")]
4522	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4523	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4524	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4525	pub const fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4526	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ps(a, b, c), no:c) }
4527	}
4528
4529	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4530	///
4531	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_ps&expand=2684)
4532	#[inline]
4533	#[target_feature(enable = "avx512f,avx512vl")]
4534	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4535	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4536	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4537	pub const fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4538	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ps(a, b, c), no:a) }
4539	}
4540
4541	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4542	///
4543	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_ps&expand=2686)
4544	#[inline]
4545	#[target_feature(enable = "avx512f,avx512vl")]
4546	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4547	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4548	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4549	pub const fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4550	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ps(a, b, c), no:_mm_setzero_ps()) }
4551	}
4552
4553	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4554	///
4555	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_ps&expand=2685)
4556	#[inline]
4557	#[target_feature(enable = "avx512f,avx512vl")]
4558	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4559	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4560	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4561	pub const fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4562	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ps(a, b, c), no:c) }
4563	}
4564
4565	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4566	///
4567	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_pd&expand=2679)
4568	#[inline]
4569	#[target_feature(enable = "avx512f")]
4570	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4571	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4572	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4573	pub const fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4574	unsafe {
4575	let add: __m512d = simd_fma(x:a, y:b, z:c);
4576	let sub: __m512d = simd_fma(x:a, y:b, z:simd_neg(c));
4577	simd_shuffle!(add, sub, [`0`, `9`, `2`, `11`, `4`, `13`, `6`, `15`])
4578	}
4579	}
4580
4581	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4582	///
4583	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_pd&expand=2680)
4584	#[inline]
4585	#[target_feature(enable = "avx512f")]
4586	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4587	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4588	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4589	pub const fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4590	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_pd(a, b, c), no:a) }
4591	}
4592
4593	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4594	///
4595	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_pd&expand=2682)
4596	#[inline]
4597	#[target_feature(enable = "avx512f")]
4598	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4599	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4600	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4601	pub const fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4602	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_pd(a, b, c), no:_mm512_setzero_pd()) }
4603	}
4604
4605	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4606	///
4607	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_pd&expand=2681)
4608	#[inline]
4609	#[target_feature(enable = "avx512f")]
4610	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4611	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4612	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4613	pub const fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4614	unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_pd(a, b, c), no:c) }
4615	}
4616
4617	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4618	///
4619	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_pd&expand=2676)
4620	#[inline]
4621	#[target_feature(enable = "avx512f,avx512vl")]
4622	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4623	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4624	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4625	pub const fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4626	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_pd(a, b, c), no:a) }
4627	}
4628
4629	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4630	///
4631	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_pd&expand=2678)
4632	#[inline]
4633	#[target_feature(enable = "avx512f,avx512vl")]
4634	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4635	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4636	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4637	pub const fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4638	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_pd(a, b, c), no:_mm256_setzero_pd()) }
4639	}
4640
4641	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4642	///
4643	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_pd&expand=2677)
4644	#[inline]
4645	#[target_feature(enable = "avx512f,avx512vl")]
4646	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4647	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4648	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4649	pub const fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4650	unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_pd(a, b, c), no:c) }
4651	}
4652
4653	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4654	///
4655	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_pd&expand=2672)
4656	#[inline]
4657	#[target_feature(enable = "avx512f,avx512vl")]
4658	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4659	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4660	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4661	pub const fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4662	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_pd(a, b, c), no:a) }
4663	}
4664
4665	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4666	///
4667	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_pd&expand=2674)
4668	#[inline]
4669	#[target_feature(enable = "avx512f,avx512vl")]
4670	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4671	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4672	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4673	pub const fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4674	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_pd(a, b, c), no:_mm_setzero_pd()) }
4675	}
4676
4677	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4678	///
4679	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_pd&expand=2673)
4680	#[inline]
4681	#[target_feature(enable = "avx512f,avx512vl")]
4682	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4683	#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4684	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4685	pub const fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4686	unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_pd(a, b, c), no:c) }
4687	}
4688
4689	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4690	///
4691	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_ps&expand=2723)
4692	#[inline]
4693	#[target_feature(enable = "avx512f")]
4694	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4695	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4696	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4697	pub const fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4698	unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
4699	}
4700
4701	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4702	///
4703	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_ps&expand=2724)
4704	#[inline]
4705	#[target_feature(enable = "avx512f")]
4706	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4707	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4708	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4709	pub const fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4710	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ps(a, b, c), no:a) }
4711	}
4712
4713	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4714	///
4715	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_ps&expand=2726)
4716	#[inline]
4717	#[target_feature(enable = "avx512f")]
4718	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4719	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4720	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4721	pub const fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4722	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ps(a, b, c), no:_mm512_setzero_ps()) }
4723	}
4724
4725	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4726	///
4727	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_ps&expand=2725)
4728	#[inline]
4729	#[target_feature(enable = "avx512f")]
4730	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4731	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4732	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4733	pub const fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4734	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ps(a, b, c), no:c) }
4735	}
4736
4737	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4738	///
4739	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_ps&expand=2720)
4740	#[inline]
4741	#[target_feature(enable = "avx512f,avx512vl")]
4742	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4743	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4744	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4745	pub const fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4746	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ps(a, b, c), no:a) }
4747	}
4748
4749	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4750	///
4751	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_ps&expand=2722)
4752	#[inline]
4753	#[target_feature(enable = "avx512f,avx512vl")]
4754	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4755	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4756	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4757	pub const fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4758	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ps(a, b, c), no:_mm256_setzero_ps()) }
4759	}
4760
4761	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4762	///
4763	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_ps&expand=2721)
4764	#[inline]
4765	#[target_feature(enable = "avx512f,avx512vl")]
4766	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4767	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4768	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4769	pub const fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4770	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ps(a, b, c), no:c) }
4771	}
4772
4773	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4774	///
4775	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_ps&expand=2716)
4776	#[inline]
4777	#[target_feature(enable = "avx512f,avx512vl")]
4778	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4779	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4780	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4781	pub const fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4782	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ps(a, b, c), no:a) }
4783	}
4784
4785	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4786	///
4787	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_ps&expand=2718)
4788	#[inline]
4789	#[target_feature(enable = "avx512f,avx512vl")]
4790	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4791	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4792	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4793	pub const fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4794	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ps(a, b, c), no:_mm_setzero_ps()) }
4795	}
4796
4797	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4798	///
4799	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_ps&expand=2717)
4800	#[inline]
4801	#[target_feature(enable = "avx512f,avx512vl")]
4802	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4803	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4804	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4805	pub const fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4806	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ps(a, b, c), no:c) }
4807	}
4808
4809	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4810	///
4811	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_pd&expand=2711)
4812	#[inline]
4813	#[target_feature(enable = "avx512f")]
4814	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4815	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4816	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4817	pub const fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4818	unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
4819	}
4820
4821	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4822	///
4823	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_pd&expand=2712)
4824	#[inline]
4825	#[target_feature(enable = "avx512f")]
4826	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4827	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4828	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4829	pub const fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4830	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_pd(a, b, c), no:a) }
4831	}
4832
4833	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4834	///
4835	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_pd&expand=2714)
4836	#[inline]
4837	#[target_feature(enable = "avx512f")]
4838	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4839	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4840	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4841	pub const fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4842	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_pd(a, b, c), no:_mm512_setzero_pd()) }
4843	}
4844
4845	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4846	///
4847	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_pd&expand=2713)
4848	#[inline]
4849	#[target_feature(enable = "avx512f")]
4850	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4851	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4852	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4853	pub const fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4854	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_pd(a, b, c), no:c) }
4855	}
4856
4857	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4858	///
4859	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_pd&expand=2708)
4860	#[inline]
4861	#[target_feature(enable = "avx512f,avx512vl")]
4862	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4863	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4864	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4865	pub const fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4866	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_pd(a, b, c), no:a) }
4867	}
4868
4869	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4870	///
4871	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_pd&expand=2710)
4872	#[inline]
4873	#[target_feature(enable = "avx512f,avx512vl")]
4874	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4875	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4876	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4877	pub const fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4878	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_pd(a, b, c), no:_mm256_setzero_pd()) }
4879	}
4880
4881	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4882	///
4883	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_pd&expand=2709)
4884	#[inline]
4885	#[target_feature(enable = "avx512f,avx512vl")]
4886	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4887	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4888	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4889	pub const fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4890	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_pd(a, b, c), no:c) }
4891	}
4892
4893	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4894	///
4895	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_pd&expand=2704)
4896	#[inline]
4897	#[target_feature(enable = "avx512f,avx512vl")]
4898	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4899	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4900	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4901	pub const fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4902	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_pd(a, b, c), no:a) }
4903	}
4904
4905	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4906	///
4907	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_pd&expand=2706)
4908	#[inline]
4909	#[target_feature(enable = "avx512f,avx512vl")]
4910	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4911	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4912	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4913	pub const fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4914	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_pd(a, b, c), no:_mm_setzero_pd()) }
4915	}
4916
4917	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4918	///
4919	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_pd&expand=2705)
4920	#[inline]
4921	#[target_feature(enable = "avx512f,avx512vl")]
4922	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4923	#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4924	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4925	pub const fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4926	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_pd(a, b, c), no:c) }
4927	}
4928
4929	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4930	///
4931	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_ps&expand=2771)
4932	#[inline]
4933	#[target_feature(enable = "avx512f")]
4934	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4935	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4936	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4937	pub const fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4938	unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
4939	}
4940
4941	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4942	///
4943	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_ps&expand=2772)
4944	#[inline]
4945	#[target_feature(enable = "avx512f")]
4946	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4947	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4948	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4949	pub const fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4950	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ps(a, b, c), no:a) }
4951	}
4952
4953	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4954	///
4955	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_ps&expand=2774)
4956	#[inline]
4957	#[target_feature(enable = "avx512f")]
4958	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4959	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4960	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4961	pub const fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4962	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ps(a, b, c), no:_mm512_setzero_ps()) }
4963	}
4964
4965	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4966	///
4967	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_ps&expand=2773)
4968	#[inline]
4969	#[target_feature(enable = "avx512f")]
4970	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4971	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4972	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4973	pub const fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4974	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ps(a, b, c), no:c) }
4975	}
4976
4977	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4978	///
4979	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_ps&expand=2768)
4980	#[inline]
4981	#[target_feature(enable = "avx512f,avx512vl")]
4982	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4983	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4984	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4985	pub const fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4986	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ps(a, b, c), no:a) }
4987	}
4988
4989	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4990	///
4991	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_ps&expand=2770)
4992	#[inline]
4993	#[target_feature(enable = "avx512f,avx512vl")]
4994	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4995	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4996	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4997	pub const fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4998	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ps(a, b, c), no:_mm256_setzero_ps()) }
4999	}
5000
5001	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5002	///
5003	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_ps&expand=2769)
5004	#[inline]
5005	#[target_feature(enable = "avx512f,avx512vl")]
5006	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5007	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
5008	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5009	pub const fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
5010	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ps(a, b, c), no:c) }
5011	}
5012
5013	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
5014	///
5015	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_ps&expand=2764)
5016	#[inline]
5017	#[target_feature(enable = "avx512f,avx512vl")]
5018	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5019	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
5020	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5021	pub const fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
5022	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ps(a, b, c), no:a) }
5023	}
5024
5025	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5026	///
5027	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_ps&expand=2766)
5028	#[inline]
5029	#[target_feature(enable = "avx512f,avx512vl")]
5030	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5031	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
5032	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5033	pub const fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
5034	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ps(a, b, c), no:_mm_setzero_ps()) }
5035	}
5036
5037	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5038	///
5039	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_ps&expand=2765)
5040	#[inline]
5041	#[target_feature(enable = "avx512f,avx512vl")]
5042	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5043	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
5044	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5045	pub const fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
5046	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ps(a, b, c), no:c) }
5047	}
5048
5049	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
5050	///
5051	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_pd&expand=2759)
5052	#[inline]
5053	#[target_feature(enable = "avx512f")]
5054	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5055	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5056	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5057	pub const fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
5058	unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
5059	}
5060
5061	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
5062	///
5063	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_pd&expand=2760)
5064	#[inline]
5065	#[target_feature(enable = "avx512f")]
5066	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5067	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5068	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5069	pub const fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
5070	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_pd(a, b, c), no:a) }
5071	}
5072
5073	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5074	///
5075	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_pd&expand=2762)
5076	#[inline]
5077	#[target_feature(enable = "avx512f")]
5078	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5079	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5080	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5081	pub const fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
5082	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_pd(a, b, c), no:_mm512_setzero_pd()) }
5083	}
5084
5085	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5086	///
5087	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_pd&expand=2761)
5088	#[inline]
5089	#[target_feature(enable = "avx512f")]
5090	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5091	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5092	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5093	pub const fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
5094	unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_pd(a, b, c), no:c) }
5095	}
5096
5097	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
5098	///
5099	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_pd&expand=2756)
5100	#[inline]
5101	#[target_feature(enable = "avx512f,avx512vl")]
5102	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5103	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5104	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5105	pub const fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
5106	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_pd(a, b, c), no:a) }
5107	}
5108
5109	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5110	///
5111	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_pd&expand=2758)
5112	#[inline]
5113	#[target_feature(enable = "avx512f,avx512vl")]
5114	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5115	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5116	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5117	pub const fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
5118	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_pd(a, b, c), no:_mm256_setzero_pd()) }
5119	}
5120
5121	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5122	///
5123	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_pd&expand=2757)
5124	#[inline]
5125	#[target_feature(enable = "avx512f,avx512vl")]
5126	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5127	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5128	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5129	pub const fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
5130	unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_pd(a, b, c), no:c) }
5131	}
5132
5133	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
5134	///
5135	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_pd&expand=2752)
5136	#[inline]
5137	#[target_feature(enable = "avx512f,avx512vl")]
5138	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5139	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5140	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5141	pub const fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
5142	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_pd(a, b, c), no:a) }
5143	}
5144
5145	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5146	///
5147	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_pd&expand=2754)
5148	#[inline]
5149	#[target_feature(enable = "avx512f,avx512vl")]
5150	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5151	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5152	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5153	pub const fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
5154	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_pd(a, b, c), no:_mm_setzero_pd()) }
5155	}
5156
5157	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5158	///
5159	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_pd&expand=2753)
5160	#[inline]
5161	#[target_feature(enable = "avx512f,avx512vl")]
5162	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5163	#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5164	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5165	pub const fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
5166	unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_pd(a, b, c), no:c) }
5167	}
5168
5169	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5170	///
5171	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_ps&expand=4502)
5172	#[inline]
5173	#[target_feature(enable = "avx512f")]
5174	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5175	#[cfg_attr(test, assert_instr(vrcp14ps))]
5176	pub fn _mm512_rcp14_ps(a: __m512) -> __m512 {
5177	unsafe { transmute(src:vrcp14ps(a.as_f32x16(), src:f32x16::ZERO, m:`0b11111111_11111111`)) }
5178	}
5179
5180	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5181	///
5182	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_ps&expand=4500)
5183	#[inline]
5184	#[target_feature(enable = "avx512f")]
5185	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5186	#[cfg_attr(test, assert_instr(vrcp14ps))]
5187	pub fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5188	unsafe { transmute(src:vrcp14ps(a.as_f32x16(), src.as_f32x16(), m:k)) }
5189	}
5190
5191	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5192	///
5193	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_ps&expand=4501)
5194	#[inline]
5195	#[target_feature(enable = "avx512f")]
5196	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5197	#[cfg_attr(test, assert_instr(vrcp14ps))]
5198	pub fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 {
5199	unsafe { transmute(src:vrcp14ps(a.as_f32x16(), src:f32x16::ZERO, m:k)) }
5200	}
5201
5202	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5203	///
5204	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_ps&expand=4499)
5205	#[inline]
5206	#[target_feature(enable = "avx512f,avx512vl")]
5207	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5208	#[cfg_attr(test, assert_instr(vrcp14ps))]
5209	pub fn _mm256_rcp14_ps(a: __m256) -> __m256 {
5210	unsafe { transmute(src:vrcp14ps256(a.as_f32x8(), src:f32x8::ZERO, m:`0b11111111`)) }
5211	}
5212
5213	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5214	///
5215	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_ps&expand=4497)
5216	#[inline]
5217	#[target_feature(enable = "avx512f,avx512vl")]
5218	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5219	#[cfg_attr(test, assert_instr(vrcp14ps))]
5220	pub fn _mm256_mask_rcp14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5221	unsafe { transmute(src:vrcp14ps256(a.as_f32x8(), src.as_f32x8(), m:k)) }
5222	}
5223
5224	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5225	///
5226	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_ps&expand=4498)
5227	#[inline]
5228	#[target_feature(enable = "avx512f,avx512vl")]
5229	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5230	#[cfg_attr(test, assert_instr(vrcp14ps))]
5231	pub fn _mm256_maskz_rcp14_ps(k: __mmask8, a: __m256) -> __m256 {
5232	unsafe { transmute(src:vrcp14ps256(a.as_f32x8(), src:f32x8::ZERO, m:k)) }
5233	}
5234
5235	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5236	///
5237	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_ps&expand=4496)
5238	#[inline]
5239	#[target_feature(enable = "avx512f,avx512vl")]
5240	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5241	#[cfg_attr(test, assert_instr(vrcp14ps))]
5242	pub fn _mm_rcp14_ps(a: __m128) -> __m128 {
5243	unsafe { transmute(src:vrcp14ps128(a.as_f32x4(), src:f32x4::ZERO, m:`0b00001111`)) }
5244	}
5245
5246	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5247	///
5248	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_ps&expand=4494)
5249	#[inline]
5250	#[target_feature(enable = "avx512f,avx512vl")]
5251	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5252	#[cfg_attr(test, assert_instr(vrcp14ps))]
5253	pub fn _mm_mask_rcp14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5254	unsafe { transmute(src:vrcp14ps128(a.as_f32x4(), src.as_f32x4(), m:k)) }
5255	}
5256
5257	/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5258	///
5259	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_ps&expand=4495)
5260	#[inline]
5261	#[target_feature(enable = "avx512f,avx512vl")]
5262	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5263	#[cfg_attr(test, assert_instr(vrcp14ps))]
5264	pub fn _mm_maskz_rcp14_ps(k: __mmask8, a: __m128) -> __m128 {
5265	unsafe { transmute(src:vrcp14ps128(a.as_f32x4(), src:f32x4::ZERO, m:k)) }
5266	}
5267
5268	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5269	///
5270	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_pd&expand=4493)
5271	#[inline]
5272	#[target_feature(enable = "avx512f")]
5273	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5274	#[cfg_attr(test, assert_instr(vrcp14pd))]
5275	pub fn _mm512_rcp14_pd(a: __m512d) -> __m512d {
5276	unsafe { transmute(src:vrcp14pd(a.as_f64x8(), src:f64x8::ZERO, m:`0b11111111`)) }
5277	}
5278
5279	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5280	///
5281	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_pd&expand=4491)
5282	#[inline]
5283	#[target_feature(enable = "avx512f")]
5284	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5285	#[cfg_attr(test, assert_instr(vrcp14pd))]
5286	pub fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5287	unsafe { transmute(src:vrcp14pd(a.as_f64x8(), src.as_f64x8(), m:k)) }
5288	}
5289
5290	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5291	///
5292	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_pd&expand=4492)
5293	#[inline]
5294	#[target_feature(enable = "avx512f")]
5295	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5296	#[cfg_attr(test, assert_instr(vrcp14pd))]
5297	pub fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d {
5298	unsafe { transmute(src:vrcp14pd(a.as_f64x8(), src:f64x8::ZERO, m:k)) }
5299	}
5300
5301	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5302	///
5303	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_pd&expand=4490)
5304	#[inline]
5305	#[target_feature(enable = "avx512f,avx512vl")]
5306	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5307	#[cfg_attr(test, assert_instr(vrcp14pd))]
5308	pub fn _mm256_rcp14_pd(a: __m256d) -> __m256d {
5309	unsafe { transmute(src:vrcp14pd256(a.as_f64x4(), src:f64x4::ZERO, m:`0b00001111`)) }
5310	}
5311
5312	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5313	///
5314	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_pd&expand=4488)
5315	#[inline]
5316	#[target_feature(enable = "avx512f,avx512vl")]
5317	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5318	#[cfg_attr(test, assert_instr(vrcp14pd))]
5319	pub fn _mm256_mask_rcp14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5320	unsafe { transmute(src:vrcp14pd256(a.as_f64x4(), src.as_f64x4(), m:k)) }
5321	}
5322
5323	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5324	///
5325	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_pd&expand=4489)
5326	#[inline]
5327	#[target_feature(enable = "avx512f,avx512vl")]
5328	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5329	#[cfg_attr(test, assert_instr(vrcp14pd))]
5330	pub fn _mm256_maskz_rcp14_pd(k: __mmask8, a: __m256d) -> __m256d {
5331	unsafe { transmute(src:vrcp14pd256(a.as_f64x4(), src:f64x4::ZERO, m:k)) }
5332	}
5333
5334	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5335	///
5336	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_pd&expand=4487)
5337	#[inline]
5338	#[target_feature(enable = "avx512f,avx512vl")]
5339	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5340	#[cfg_attr(test, assert_instr(vrcp14pd))]
5341	pub fn _mm_rcp14_pd(a: __m128d) -> __m128d {
5342	unsafe { transmute(src:vrcp14pd128(a.as_f64x2(), src:f64x2::ZERO, m:`0b00000011`)) }
5343	}
5344
5345	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5346	///
5347	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_pd&expand=4485)
5348	#[inline]
5349	#[target_feature(enable = "avx512f,avx512vl")]
5350	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5351	#[cfg_attr(test, assert_instr(vrcp14pd))]
5352	pub fn _mm_mask_rcp14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5353	unsafe { transmute(src:vrcp14pd128(a.as_f64x2(), src.as_f64x2(), m:k)) }
5354	}
5355
5356	/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5357	///
5358	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_pd&expand=4486)
5359	#[inline]
5360	#[target_feature(enable = "avx512f,avx512vl")]
5361	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5362	#[cfg_attr(test, assert_instr(vrcp14pd))]
5363	pub fn _mm_maskz_rcp14_pd(k: __mmask8, a: __m128d) -> __m128d {
5364	unsafe { transmute(src:vrcp14pd128(a.as_f64x2(), src:f64x2::ZERO, m:k)) }
5365	}
5366
5367	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5368	///
5369	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_ps&expand=4819)
5370	#[inline]
5371	#[target_feature(enable = "avx512f")]
5372	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5373	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5374	pub fn _mm512_rsqrt14_ps(a: __m512) -> __m512 {
5375	unsafe { transmute(src:vrsqrt14ps(a.as_f32x16(), src:f32x16::ZERO, m:`0b11111111_11111111`)) }
5376	}
5377
5378	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5379	///
5380	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_ps&expand=4817)
5381	#[inline]
5382	#[target_feature(enable = "avx512f")]
5383	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5384	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5385	pub fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5386	unsafe { transmute(src:vrsqrt14ps(a.as_f32x16(), src.as_f32x16(), m:k)) }
5387	}
5388
5389	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5390	///
5391	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_ps&expand=4818)
5392	#[inline]
5393	#[target_feature(enable = "avx512f")]
5394	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5395	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5396	pub fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 {
5397	unsafe { transmute(src:vrsqrt14ps(a.as_f32x16(), src:f32x16::ZERO, m:k)) }
5398	}
5399
5400	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5401	///
5402	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_ps)
5403	#[inline]
5404	#[target_feature(enable = "avx512f,avx512vl")]
5405	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5406	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5407	pub fn _mm256_rsqrt14_ps(a: __m256) -> __m256 {
5408	unsafe { transmute(src:vrsqrt14ps256(a.as_f32x8(), src:f32x8::ZERO, m:`0b11111111`)) }
5409	}
5410
5411	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5412	///
5413	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_ps&expand=4815)
5414	#[inline]
5415	#[target_feature(enable = "avx512f,avx512vl")]
5416	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5417	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5418	pub fn _mm256_mask_rsqrt14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5419	unsafe { transmute(src:vrsqrt14ps256(a.as_f32x8(), src.as_f32x8(), m:k)) }
5420	}
5421
5422	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5423	///
5424	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_ps&expand=4816)
5425	#[inline]
5426	#[target_feature(enable = "avx512f,avx512vl")]
5427	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5428	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5429	pub fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 {
5430	unsafe { transmute(src:vrsqrt14ps256(a.as_f32x8(), src:f32x8::ZERO, m:k)) }
5431	}
5432
5433	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5434	///
5435	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_ps)
5436	#[inline]
5437	#[target_feature(enable = "avx512f,avx512vl")]
5438	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5439	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5440	pub fn _mm_rsqrt14_ps(a: __m128) -> __m128 {
5441	unsafe { transmute(src:vrsqrt14ps128(a.as_f32x4(), src:f32x4::ZERO, m:`0b00001111`)) }
5442	}
5443
5444	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5445	///
5446	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_ps&expand=4813)
5447	#[inline]
5448	#[target_feature(enable = "avx512f,avx512vl")]
5449	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5450	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5451	pub fn _mm_mask_rsqrt14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5452	unsafe { transmute(src:vrsqrt14ps128(a.as_f32x4(), src.as_f32x4(), m:k)) }
5453	}
5454
5455	/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5456	///
5457	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_ps&expand=4814)
5458	#[inline]
5459	#[target_feature(enable = "avx512f,avx512vl")]
5460	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5461	#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5462	pub fn _mm_maskz_rsqrt14_ps(k: __mmask8, a: __m128) -> __m128 {
5463	unsafe { transmute(src:vrsqrt14ps128(a.as_f32x4(), src:f32x4::ZERO, m:k)) }
5464	}
5465
5466	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5467	///
5468	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_pd&expand=4812)
5469	#[inline]
5470	#[target_feature(enable = "avx512f")]
5471	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5472	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5473	pub fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d {
5474	unsafe { transmute(src:vrsqrt14pd(a.as_f64x8(), src:f64x8::ZERO, m:`0b11111111`)) }
5475	}
5476
5477	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5478	///
5479	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_pd&expand=4810)
5480	#[inline]
5481	#[target_feature(enable = "avx512f")]
5482	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5483	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5484	pub fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5485	unsafe { transmute(src:vrsqrt14pd(a.as_f64x8(), src.as_f64x8(), m:k)) }
5486	}
5487
5488	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5489	///
5490	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_pd&expand=4811)
5491	#[inline]
5492	#[target_feature(enable = "avx512f")]
5493	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5494	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5495	pub fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d {
5496	unsafe { transmute(src:vrsqrt14pd(a.as_f64x8(), src:f64x8::ZERO, m:k)) }
5497	}
5498
5499	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5500	///
5501	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_pd)
5502	#[inline]
5503	#[target_feature(enable = "avx512f,avx512vl")]
5504	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5505	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5506	pub fn _mm256_rsqrt14_pd(a: __m256d) -> __m256d {
5507	unsafe { transmute(src:vrsqrt14pd256(a.as_f64x4(), src:f64x4::ZERO, m:`0b00001111`)) }
5508	}
5509
5510	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5511	///
5512	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_pd&expand=4808)
5513	#[inline]
5514	#[target_feature(enable = "avx512f,avx512vl")]
5515	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5516	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5517	pub fn _mm256_mask_rsqrt14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5518	unsafe { transmute(src:vrsqrt14pd256(a.as_f64x4(), src.as_f64x4(), m:k)) }
5519	}
5520
5521	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5522	///
5523	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_pd&expand=4809)
5524	#[inline]
5525	#[target_feature(enable = "avx512f,avx512vl")]
5526	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5527	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5528	pub fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d {
5529	unsafe { transmute(src:vrsqrt14pd256(a.as_f64x4(), src:f64x4::ZERO, m:k)) }
5530	}
5531
5532	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5533	///
5534	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_pd)
5535	#[inline]
5536	#[target_feature(enable = "avx512f,avx512vl")]
5537	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5538	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5539	pub fn _mm_rsqrt14_pd(a: __m128d) -> __m128d {
5540	unsafe { transmute(src:vrsqrt14pd128(a.as_f64x2(), src:f64x2::ZERO, m:`0b00000011`)) }
5541	}
5542
5543	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5544	///
5545	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_pd&expand=4806)
5546	#[inline]
5547	#[target_feature(enable = "avx512f,avx512vl")]
5548	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5549	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5550	pub fn _mm_mask_rsqrt14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5551	unsafe { transmute(src:vrsqrt14pd128(a.as_f64x2(), src.as_f64x2(), m:k)) }
5552	}
5553
5554	/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5555	///
5556	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_pd&expand=4807)
5557	#[inline]
5558	#[target_feature(enable = "avx512f,avx512vl")]
5559	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5560	#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5561	pub fn _mm_maskz_rsqrt14_pd(k: __mmask8, a: __m128d) -> __m128d {
5562	unsafe { transmute(src:vrsqrt14pd128(a.as_f64x2(), src:f64x2::ZERO, m:k)) }
5563	}
5564
5565	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5566	///
5567	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_ps&expand=2844)
5568	#[inline]
5569	#[target_feature(enable = "avx512f")]
5570	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5571	#[cfg_attr(test, assert_instr(vgetexpps))]
5572	pub fn _mm512_getexp_ps(a: __m512) -> __m512 {
5573	unsafe {
5574	transmute(src:vgetexpps(
5575	a.as_f32x16(),
5576	src:f32x16::ZERO,
5577	m:`0b11111111_11111111`,
5578	_MM_FROUND_CUR_DIRECTION,
5579	))
5580	}
5581	}
5582
5583	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5584	///
5585	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_ps&expand=2845)
5586	#[inline]
5587	#[target_feature(enable = "avx512f")]
5588	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5589	#[cfg_attr(test, assert_instr(vgetexpps))]
5590	pub fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5591	unsafe {
5592	transmute(src:vgetexpps(
5593	a.as_f32x16(),
5594	src.as_f32x16(),
5595	m:k,
5596	_MM_FROUND_CUR_DIRECTION,
5597	))
5598	}
5599	}
5600
5601	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5602	///
5603	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_ps&expand=2846)
5604	#[inline]
5605	#[target_feature(enable = "avx512f")]
5606	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5607	#[cfg_attr(test, assert_instr(vgetexpps))]
5608	pub fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 {
5609	unsafe {
5610	transmute(src:vgetexpps(
5611	a.as_f32x16(),
5612	src:f32x16::ZERO,
5613	m:k,
5614	_MM_FROUND_CUR_DIRECTION,
5615	))
5616	}
5617	}
5618
5619	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5620	///
5621	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_ps&expand=2841)
5622	#[inline]
5623	#[target_feature(enable = "avx512f,avx512vl")]
5624	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5625	#[cfg_attr(test, assert_instr(vgetexpps))]
5626	pub fn _mm256_getexp_ps(a: __m256) -> __m256 {
5627	unsafe { transmute(src:vgetexpps256(a.as_f32x8(), src:f32x8::ZERO, m:`0b11111111`)) }
5628	}
5629
5630	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5631	///
5632	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_ps&expand=2842)
5633	#[inline]
5634	#[target_feature(enable = "avx512f,avx512vl")]
5635	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5636	#[cfg_attr(test, assert_instr(vgetexpps))]
5637	pub fn _mm256_mask_getexp_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5638	unsafe { transmute(src:vgetexpps256(a.as_f32x8(), src.as_f32x8(), m:k)) }
5639	}
5640
5641	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5642	///
5643	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_ps&expand=2843)
5644	#[inline]
5645	#[target_feature(enable = "avx512f,avx512vl")]
5646	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5647	#[cfg_attr(test, assert_instr(vgetexpps))]
5648	pub fn _mm256_maskz_getexp_ps(k: __mmask8, a: __m256) -> __m256 {
5649	unsafe { transmute(src:vgetexpps256(a.as_f32x8(), src:f32x8::ZERO, m:k)) }
5650	}
5651
5652	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5653	///
5654	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_ps&expand=2838)
5655	#[inline]
5656	#[target_feature(enable = "avx512f,avx512vl")]
5657	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5658	#[cfg_attr(test, assert_instr(vgetexpps))]
5659	pub fn _mm_getexp_ps(a: __m128) -> __m128 {
5660	unsafe { transmute(src:vgetexpps128(a.as_f32x4(), src:f32x4::ZERO, m:`0b00001111`)) }
5661	}
5662
5663	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5664	///
5665	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_ps&expand=2839)
5666	#[inline]
5667	#[target_feature(enable = "avx512f,avx512vl")]
5668	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5669	#[cfg_attr(test, assert_instr(vgetexpps))]
5670	pub fn _mm_mask_getexp_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5671	unsafe { transmute(src:vgetexpps128(a.as_f32x4(), src.as_f32x4(), m:k)) }
5672	}
5673
5674	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5675	///
5676	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_ps&expand=2840)
5677	#[inline]
5678	#[target_feature(enable = "avx512f,avx512vl")]
5679	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5680	#[cfg_attr(test, assert_instr(vgetexpps))]
5681	pub fn _mm_maskz_getexp_ps(k: __mmask8, a: __m128) -> __m128 {
5682	unsafe { transmute(src:vgetexpps128(a.as_f32x4(), src:f32x4::ZERO, m:k)) }
5683	}
5684
5685	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5686	///
5687	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_pd&expand=2835)
5688	#[inline]
5689	#[target_feature(enable = "avx512f")]
5690	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5691	#[cfg_attr(test, assert_instr(vgetexppd))]
5692	pub fn _mm512_getexp_pd(a: __m512d) -> __m512d {
5693	unsafe {
5694	transmute(src:vgetexppd(
5695	a.as_f64x8(),
5696	src:f64x8::ZERO,
5697	m:`0b11111111`,
5698	_MM_FROUND_CUR_DIRECTION,
5699	))
5700	}
5701	}
5702
5703	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5704	///
5705	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_pd&expand=2836)
5706	#[inline]
5707	#[target_feature(enable = "avx512f")]
5708	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5709	#[cfg_attr(test, assert_instr(vgetexppd))]
5710	pub fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5711	unsafe {
5712	transmute(src:vgetexppd(
5713	a.as_f64x8(),
5714	src.as_f64x8(),
5715	m:k,
5716	_MM_FROUND_CUR_DIRECTION,
5717	))
5718	}
5719	}
5720
5721	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5722	///
5723	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_pd&expand=2837)
5724	#[inline]
5725	#[target_feature(enable = "avx512f")]
5726	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5727	#[cfg_attr(test, assert_instr(vgetexppd))]
5728	pub fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d {
5729	unsafe {
5730	transmute(src:vgetexppd(
5731	a.as_f64x8(),
5732	src:f64x8::ZERO,
5733	m:k,
5734	_MM_FROUND_CUR_DIRECTION,
5735	))
5736	}
5737	}
5738
5739	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5740	///
5741	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_pd&expand=2832)
5742	#[inline]
5743	#[target_feature(enable = "avx512f,avx512vl")]
5744	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5745	#[cfg_attr(test, assert_instr(vgetexppd))]
5746	pub fn _mm256_getexp_pd(a: __m256d) -> __m256d {
5747	unsafe { transmute(src:vgetexppd256(a.as_f64x4(), src:f64x4::ZERO, m:`0b00001111`)) }
5748	}
5749
5750	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5751	///
5752	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_pd&expand=2833)
5753	#[inline]
5754	#[target_feature(enable = "avx512f,avx512vl")]
5755	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5756	#[cfg_attr(test, assert_instr(vgetexppd))]
5757	pub fn _mm256_mask_getexp_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5758	unsafe { transmute(src:vgetexppd256(a.as_f64x4(), src.as_f64x4(), m:k)) }
5759	}
5760
5761	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5762	///
5763	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_pd&expand=2834)
5764	#[inline]
5765	#[target_feature(enable = "avx512f,avx512vl")]
5766	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5767	#[cfg_attr(test, assert_instr(vgetexppd))]
5768	pub fn _mm256_maskz_getexp_pd(k: __mmask8, a: __m256d) -> __m256d {
5769	unsafe { transmute(src:vgetexppd256(a.as_f64x4(), src:f64x4::ZERO, m:k)) }
5770	}
5771
5772	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5773	///
5774	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_pd&expand=2829)
5775	#[inline]
5776	#[target_feature(enable = "avx512f,avx512vl")]
5777	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5778	#[cfg_attr(test, assert_instr(vgetexppd))]
5779	pub fn _mm_getexp_pd(a: __m128d) -> __m128d {
5780	unsafe { transmute(src:vgetexppd128(a.as_f64x2(), src:f64x2::ZERO, m:`0b00000011`)) }
5781	}
5782
5783	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5784	///
5785	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_pd&expand=2830)
5786	#[inline]
5787	#[target_feature(enable = "avx512f,avx512vl")]
5788	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5789	#[cfg_attr(test, assert_instr(vgetexppd))]
5790	pub fn _mm_mask_getexp_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5791	unsafe { transmute(src:vgetexppd128(a.as_f64x2(), src.as_f64x2(), m:k)) }
5792	}
5793
5794	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5795	///
5796	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_pd&expand=2831)
5797	#[inline]
5798	#[target_feature(enable = "avx512f,avx512vl")]
5799	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5800	#[cfg_attr(test, assert_instr(vgetexppd))]
5801	pub fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d {
5802	unsafe { transmute(src:vgetexppd128(a.as_f64x2(), src:f64x2::ZERO, m:k)) }
5803	}
5804
5805	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5806	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5807	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5808	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5809	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5810	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5811	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5812	///
5813	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_ps&expand=4784)
5814	#[inline]
5815	#[target_feature(enable = "avx512f")]
5816	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5817	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`))]
5818	#[rustc_legacy_const_generics(`1`)]
5819	pub fn _mm512_roundscale_ps<const IMM8: i32>(a: __m512) -> __m512 {
5820	unsafe {
5821	static_assert_uimm_bits!(IMM8, `8`);
5822	let a: Simd = a.as_f32x16();
5823	let r: Simd = vrndscaleps(
5824	a,
5825	IMM8,
5826	src:f32x16::ZERO,
5827	mask:`0b11111111_11111111`,
5828	_MM_FROUND_CUR_DIRECTION,
5829	);
5830	transmute(src:r)
5831	}
5832	}
5833
5834	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5835	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5836	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5837	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5838	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5839	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5840	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5841	///
5842	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_ps&expand=4782)
5843	#[inline]
5844	#[target_feature(enable = "avx512f")]
5845	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5846	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`))]
5847	#[rustc_legacy_const_generics(`3`)]
5848	pub fn _mm512_mask_roundscale_ps<const IMM8: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5849	unsafe {
5850	static_assert_uimm_bits!(IMM8, `8`);
5851	let a: Simd = a.as_f32x16();
5852	let src: Simd = src.as_f32x16();
5853	let r: Simd = vrndscaleps(a, IMM8, src, mask:k, _MM_FROUND_CUR_DIRECTION);
5854	transmute(src:r)
5855	}
5856	}
5857
5858	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5859	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5860	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5861	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5862	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5863	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5864	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5865	///
5866	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_ps&expand=4783)
5867	#[inline]
5868	#[target_feature(enable = "avx512f")]
5869	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5870	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`))]
5871	#[rustc_legacy_const_generics(`2`)]
5872	pub fn _mm512_maskz_roundscale_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 {
5873	unsafe {
5874	static_assert_uimm_bits!(IMM8, `8`);
5875	let a: Simd = a.as_f32x16();
5876	let r: Simd = vrndscaleps(a, IMM8, src:f32x16::ZERO, mask:k, _MM_FROUND_CUR_DIRECTION);
5877	transmute(src:r)
5878	}
5879	}
5880
5881	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5882	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5883	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5884	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5885	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5886	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5887	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5888	///
5889	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_ps&expand=4781)
5890	#[inline]
5891	#[target_feature(enable = "avx512f,avx512vl")]
5892	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5893	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `250`))]
5894	#[rustc_legacy_const_generics(`1`)]
5895	pub fn _mm256_roundscale_ps<const IMM8: i32>(a: __m256) -> __m256 {
5896	unsafe {
5897	static_assert_uimm_bits!(IMM8, `8`);
5898	let a: Simd = a.as_f32x8();
5899	let r: Simd = vrndscaleps256(a, IMM8, src:f32x8::ZERO, mask:`0b11111111`);
5900	transmute(src:r)
5901	}
5902	}
5903
5904	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5905	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5906	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5907	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5908	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5909	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5910	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5911	///
5912	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_ps&expand=4779)
5913	#[inline]
5914	#[target_feature(enable = "avx512f,avx512vl")]
5915	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5916	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`))]
5917	#[rustc_legacy_const_generics(`3`)]
5918	pub fn _mm256_mask_roundscale_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5919	unsafe {
5920	static_assert_uimm_bits!(IMM8, `8`);
5921	let a: Simd = a.as_f32x8();
5922	let src: Simd = src.as_f32x8();
5923	let r: Simd = vrndscaleps256(a, IMM8, src, mask:k);
5924	transmute(src:r)
5925	}
5926	}
5927
5928	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5929	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5930	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5931	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5932	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5933	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5934	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5935	///
5936	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_ps&expand=4780)
5937	#[inline]
5938	#[target_feature(enable = "avx512f,avx512vl")]
5939	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5940	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`))]
5941	#[rustc_legacy_const_generics(`2`)]
5942	pub fn _mm256_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 {
5943	unsafe {
5944	static_assert_uimm_bits!(IMM8, `8`);
5945	let a: Simd = a.as_f32x8();
5946	let r: Simd = vrndscaleps256(a, IMM8, src:f32x8::ZERO, mask:k);
5947	transmute(src:r)
5948	}
5949	}
5950
5951	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5952	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5953	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5954	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5955	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5956	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5957	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5958	///
5959	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_ps&expand=4778)
5960	#[inline]
5961	#[target_feature(enable = "avx512f,avx512vl")]
5962	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5963	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `250`))]
5964	#[rustc_legacy_const_generics(`1`)]
5965	pub fn _mm_roundscale_ps<const IMM8: i32>(a: __m128) -> __m128 {
5966	unsafe {
5967	static_assert_uimm_bits!(IMM8, `8`);
5968	let a: Simd = a.as_f32x4();
5969	let r: Simd = vrndscaleps128(a, IMM8, src:f32x4::ZERO, mask:`0b00001111`);
5970	transmute(src:r)
5971	}
5972	}
5973
5974	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5975	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5976	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5977	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5978	/// * [`_MM_FROUND_TO_POS_INF`] : round up
5979	/// * [`_MM_FROUND_TO_ZERO`] : truncate
5980	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5981	///
5982	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_ps&expand=4776)
5983	#[inline]
5984	#[target_feature(enable = "avx512f,avx512vl")]
5985	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5986	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`))]
5987	#[rustc_legacy_const_generics(`3`)]
5988	pub fn _mm_mask_roundscale_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5989	unsafe {
5990	static_assert_uimm_bits!(IMM8, `8`);
5991	let a: Simd = a.as_f32x4();
5992	let src: Simd = src.as_f32x4();
5993	let r: Simd = vrndscaleps128(a, IMM8, src, mask:k);
5994	transmute(src:r)
5995	}
5996	}
5997
5998	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5999	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6000	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6001	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6002	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6003	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6004	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6005	///
6006	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_ps&expand=4777)
6007	#[inline]
6008	#[target_feature(enable = "avx512f,avx512vl")]
6009	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6010	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`))]
6011	#[rustc_legacy_const_generics(`2`)]
6012	pub fn _mm_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 {
6013	unsafe {
6014	static_assert_uimm_bits!(IMM8, `8`);
6015	let a: Simd = a.as_f32x4();
6016	let r: Simd = vrndscaleps128(a, IMM8, src:f32x4::ZERO, mask:k);
6017	transmute(src:r)
6018	}
6019	}
6020
6021	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
6022	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6023	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6024	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6025	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6026	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6027	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6028	///
6029	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_pd&expand=4775)
6030	#[inline]
6031	#[target_feature(enable = "avx512f")]
6032	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6033	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
6034	#[rustc_legacy_const_generics(`1`)]
6035	pub fn _mm512_roundscale_pd<const IMM8: i32>(a: __m512d) -> __m512d {
6036	unsafe {
6037	static_assert_uimm_bits!(IMM8, `8`);
6038	let a: Simd = a.as_f64x8();
6039	let r: Simd = vrndscalepd(a, IMM8, src:f64x8::ZERO, mask:`0b11111111`, _MM_FROUND_CUR_DIRECTION);
6040	transmute(src:r)
6041	}
6042	}
6043
6044	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
6045	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6046	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6047	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6048	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6049	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6050	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6051	///
6052	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_pd&expand=4773)
6053	#[inline]
6054	#[target_feature(enable = "avx512f")]
6055	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6056	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
6057	#[rustc_legacy_const_generics(`3`)]
6058	pub fn _mm512_mask_roundscale_pd<const IMM8: i32>(
6059	src: __m512d,
6060	k: __mmask8,
6061	a: __m512d,
6062	) -> __m512d {
6063	unsafe {
6064	static_assert_uimm_bits!(IMM8, `8`);
6065	let a: Simd = a.as_f64x8();
6066	let src: Simd = src.as_f64x8();
6067	let r: Simd = vrndscalepd(a, IMM8, src, mask:k, _MM_FROUND_CUR_DIRECTION);
6068	transmute(src:r)
6069	}
6070	}
6071
6072	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
6073	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6074	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6075	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6076	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6077	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6078	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6079	///
6080	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_pd&expand=4774)
6081	#[inline]
6082	#[target_feature(enable = "avx512f")]
6083	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6084	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
6085	#[rustc_legacy_const_generics(`2`)]
6086	pub fn _mm512_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d {
6087	unsafe {
6088	static_assert_uimm_bits!(IMM8, `8`);
6089	let a: Simd = a.as_f64x8();
6090	let r: Simd = vrndscalepd(a, IMM8, src:f64x8::ZERO, mask:k, _MM_FROUND_CUR_DIRECTION);
6091	transmute(src:r)
6092	}
6093	}
6094
6095	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
6096	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6097	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6098	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6099	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6100	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6101	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6102	///
6103	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_pd&expand=4772)
6104	#[inline]
6105	#[target_feature(enable = "avx512f,avx512vl")]
6106	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6107	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `16`))]
6108	#[rustc_legacy_const_generics(`1`)]
6109	pub fn _mm256_roundscale_pd<const IMM8: i32>(a: __m256d) -> __m256d {
6110	unsafe {
6111	static_assert_uimm_bits!(IMM8, `8`);
6112	let a: Simd = a.as_f64x4();
6113	let r: Simd = vrndscalepd256(a, IMM8, src:f64x4::ZERO, mask:`0b00001111`);
6114	transmute(src:r)
6115	}
6116	}
6117
6118	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
6119	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6120	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6121	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6122	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6123	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6124	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6125	///
6126	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_pd&expand=4770)
6127	#[inline]
6128	#[target_feature(enable = "avx512f,avx512vl")]
6129	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6130	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
6131	#[rustc_legacy_const_generics(`3`)]
6132	pub fn _mm256_mask_roundscale_pd<const IMM8: i32>(
6133	src: __m256d,
6134	k: __mmask8,
6135	a: __m256d,
6136	) -> __m256d {
6137	unsafe {
6138	static_assert_uimm_bits!(IMM8, `8`);
6139	let a: Simd = a.as_f64x4();
6140	let src: Simd = src.as_f64x4();
6141	let r: Simd = vrndscalepd256(a, IMM8, src, mask:k);
6142	transmute(src:r)
6143	}
6144	}
6145
6146	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
6147	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6148	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6149	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6150	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6151	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6152	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6153	///
6154	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_pd&expand=4771)
6155	#[inline]
6156	#[target_feature(enable = "avx512f,avx512vl")]
6157	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6158	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
6159	#[rustc_legacy_const_generics(`2`)]
6160	pub fn _mm256_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d {
6161	unsafe {
6162	static_assert_uimm_bits!(IMM8, `8`);
6163	let a: Simd = a.as_f64x4();
6164	let r: Simd = vrndscalepd256(a, IMM8, src:f64x4::ZERO, mask:k);
6165	transmute(src:r)
6166	}
6167	}
6168
6169	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
6170	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6171	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6172	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6173	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6174	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6175	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6176	///
6177	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_pd&expand=4769)
6178	#[inline]
6179	#[target_feature(enable = "avx512f,avx512vl")]
6180	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6181	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `16`))]
6182	#[rustc_legacy_const_generics(`1`)]
6183	pub fn _mm_roundscale_pd<const IMM8: i32>(a: __m128d) -> __m128d {
6184	unsafe {
6185	static_assert_uimm_bits!(IMM8, `8`);
6186	let a: Simd = a.as_f64x2();
6187	let r: Simd = vrndscalepd128(a, IMM8, src:f64x2::ZERO, mask:`0b00000011`);
6188	transmute(src:r)
6189	}
6190	}
6191
6192	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
6193	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6194	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6195	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6196	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6197	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6198	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6199	///
6200	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_pd&expand=4767)
6201	#[inline]
6202	#[target_feature(enable = "avx512f,avx512vl")]
6203	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6204	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
6205	#[rustc_legacy_const_generics(`3`)]
6206	pub fn _mm_mask_roundscale_pd<const IMM8: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
6207	unsafe {
6208	static_assert_uimm_bits!(IMM8, `8`);
6209	let a: Simd = a.as_f64x2();
6210	let src: Simd = src.as_f64x2();
6211	let r: Simd = vrndscalepd128(a, IMM8, src, mask:k);
6212	transmute(src:r)
6213	}
6214	}
6215
6216	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
6217	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6218	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6219	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6220	/// * [`_MM_FROUND_TO_POS_INF`] : round up
6221	/// * [`_MM_FROUND_TO_ZERO`] : truncate
6222	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6223	///
6224	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_pd&expand=4768)
6225	#[inline]
6226	#[target_feature(enable = "avx512f,avx512vl")]
6227	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6228	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`))]
6229	#[rustc_legacy_const_generics(`2`)]
6230	pub fn _mm_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d {
6231	unsafe {
6232	static_assert_uimm_bits!(IMM8, `8`);
6233	let a: Simd = a.as_f64x2();
6234	let r: Simd = vrndscalepd128(a, IMM8, src:f64x2::ZERO, mask:k);
6235	transmute(src:r)
6236	}
6237	}
6238
6239	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6240	///
6241	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_ps&expand=4883)
6242	#[inline]
6243	#[target_feature(enable = "avx512f")]
6244	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6245	#[cfg_attr(test, assert_instr(vscalefps))]
6246	pub fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 {
6247	unsafe {
6248	transmute(src:vscalefps(
6249	a.as_f32x16(),
6250	b.as_f32x16(),
6251	src:f32x16::ZERO,
6252	mask:`0b11111111_11111111`,
6253	_MM_FROUND_CUR_DIRECTION,
6254	))
6255	}
6256	}
6257
6258	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6259	///
6260	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_ps&expand=4881)
6261	#[inline]
6262	#[target_feature(enable = "avx512f")]
6263	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6264	#[cfg_attr(test, assert_instr(vscalefps))]
6265	pub fn _mm512_mask_scalef_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
6266	unsafe {
6267	transmute(src:vscalefps(
6268	a.as_f32x16(),
6269	b.as_f32x16(),
6270	src.as_f32x16(),
6271	mask:k,
6272	_MM_FROUND_CUR_DIRECTION,
6273	))
6274	}
6275	}
6276
6277	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6278	///
6279	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_ps&expand=4882)
6280	#[inline]
6281	#[target_feature(enable = "avx512f")]
6282	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6283	#[cfg_attr(test, assert_instr(vscalefps))]
6284	pub fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
6285	unsafe {
6286	transmute(src:vscalefps(
6287	a.as_f32x16(),
6288	b.as_f32x16(),
6289	src:f32x16::ZERO,
6290	mask:k,
6291	_MM_FROUND_CUR_DIRECTION,
6292	))
6293	}
6294	}
6295
6296	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6297	///
6298	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_ps&expand=4880)
6299	#[inline]
6300	#[target_feature(enable = "avx512f,avx512vl")]
6301	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6302	#[cfg_attr(test, assert_instr(vscalefps))]
6303	pub fn _mm256_scalef_ps(a: __m256, b: __m256) -> __m256 {
6304	unsafe {
6305	transmute(src:vscalefps256(
6306	a.as_f32x8(),
6307	b.as_f32x8(),
6308	src:f32x8::ZERO,
6309	mask:`0b11111111`,
6310	))
6311	}
6312	}
6313
6314	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6315	///
6316	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_ps&expand=4878)
6317	#[inline]
6318	#[target_feature(enable = "avx512f,avx512vl")]
6319	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6320	#[cfg_attr(test, assert_instr(vscalefps))]
6321	pub fn _mm256_mask_scalef_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
6322	unsafe { transmute(src:vscalefps256(a.as_f32x8(), b.as_f32x8(), src.as_f32x8(), mask:k)) }
6323	}
6324
6325	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6326	///
6327	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_ps&expand=4879)
6328	#[inline]
6329	#[target_feature(enable = "avx512f,avx512vl")]
6330	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6331	#[cfg_attr(test, assert_instr(vscalefps))]
6332	pub fn _mm256_maskz_scalef_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
6333	unsafe { transmute(src:vscalefps256(a.as_f32x8(), b.as_f32x8(), src:f32x8::ZERO, mask:k)) }
6334	}
6335
6336	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6337	///
6338	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_ps&expand=4877)
6339	#[inline]
6340	#[target_feature(enable = "avx512f,avx512vl")]
6341	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6342	#[cfg_attr(test, assert_instr(vscalefps))]
6343	pub fn _mm_scalef_ps(a: __m128, b: __m128) -> __m128 {
6344	unsafe {
6345	transmute(src:vscalefps128(
6346	a.as_f32x4(),
6347	b.as_f32x4(),
6348	src:f32x4::ZERO,
6349	mask:`0b00001111`,
6350	))
6351	}
6352	}
6353
6354	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6355	///
6356	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_ps&expand=4875)
6357	#[inline]
6358	#[target_feature(enable = "avx512f,avx512vl")]
6359	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6360	#[cfg_attr(test, assert_instr(vscalefps))]
6361	pub fn _mm_mask_scalef_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
6362	unsafe { transmute(src:vscalefps128(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), mask:k)) }
6363	}
6364
6365	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6366	///
6367	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_ps&expand=4876)
6368	#[inline]
6369	#[target_feature(enable = "avx512f,avx512vl")]
6370	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6371	#[cfg_attr(test, assert_instr(vscalefps))]
6372	pub fn _mm_maskz_scalef_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
6373	unsafe { transmute(src:vscalefps128(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:k)) }
6374	}
6375
6376	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6377	///
6378	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_pd&expand=4874)
6379	#[inline]
6380	#[target_feature(enable = "avx512f")]
6381	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6382	#[cfg_attr(test, assert_instr(vscalefpd))]
6383	pub fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d {
6384	unsafe {
6385	transmute(src:vscalefpd(
6386	a.as_f64x8(),
6387	b.as_f64x8(),
6388	src:f64x8::ZERO,
6389	mask:`0b11111111`,
6390	_MM_FROUND_CUR_DIRECTION,
6391	))
6392	}
6393	}
6394
6395	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6396	///
6397	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_pd&expand=4872)
6398	#[inline]
6399	#[target_feature(enable = "avx512f")]
6400	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6401	#[cfg_attr(test, assert_instr(vscalefpd))]
6402	pub fn _mm512_mask_scalef_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6403	unsafe {
6404	transmute(src:vscalefpd(
6405	a.as_f64x8(),
6406	b.as_f64x8(),
6407	src.as_f64x8(),
6408	mask:k,
6409	_MM_FROUND_CUR_DIRECTION,
6410	))
6411	}
6412	}
6413
6414	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6415	///
6416	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_pd&expand=4873)
6417	#[inline]
6418	#[target_feature(enable = "avx512f")]
6419	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6420	#[cfg_attr(test, assert_instr(vscalefpd))]
6421	pub fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6422	unsafe {
6423	transmute(src:vscalefpd(
6424	a.as_f64x8(),
6425	b.as_f64x8(),
6426	src:f64x8::ZERO,
6427	mask:k,
6428	_MM_FROUND_CUR_DIRECTION,
6429	))
6430	}
6431	}
6432
6433	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6434	///
6435	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_pd&expand=4871)
6436	#[inline]
6437	#[target_feature(enable = "avx512f,avx512vl")]
6438	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6439	#[cfg_attr(test, assert_instr(vscalefpd))]
6440	pub fn _mm256_scalef_pd(a: __m256d, b: __m256d) -> __m256d {
6441	unsafe {
6442	transmute(src:vscalefpd256(
6443	a.as_f64x4(),
6444	b.as_f64x4(),
6445	src:f64x4::ZERO,
6446	mask:`0b00001111`,
6447	))
6448	}
6449	}
6450
6451	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6452	///
6453	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_pd&expand=4869)
6454	#[inline]
6455	#[target_feature(enable = "avx512f,avx512vl")]
6456	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6457	#[cfg_attr(test, assert_instr(vscalefpd))]
6458	pub fn _mm256_mask_scalef_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6459	unsafe { transmute(src:vscalefpd256(a.as_f64x4(), b.as_f64x4(), src.as_f64x4(), mask:k)) }
6460	}
6461
6462	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6463	///
6464	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_pd&expand=4870)
6465	#[inline]
6466	#[target_feature(enable = "avx512f,avx512vl")]
6467	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6468	#[cfg_attr(test, assert_instr(vscalefpd))]
6469	pub fn _mm256_maskz_scalef_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6470	unsafe { transmute(src:vscalefpd256(a.as_f64x4(), b.as_f64x4(), src:f64x4::ZERO, mask:k)) }
6471	}
6472
6473	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6474	///
6475	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_pd&expand=4868)
6476	#[inline]
6477	#[target_feature(enable = "avx512f,avx512vl")]
6478	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6479	#[cfg_attr(test, assert_instr(vscalefpd))]
6480	pub fn _mm_scalef_pd(a: __m128d, b: __m128d) -> __m128d {
6481	unsafe {
6482	transmute(src:vscalefpd128(
6483	a.as_f64x2(),
6484	b.as_f64x2(),
6485	src:f64x2::ZERO,
6486	mask:`0b00000011`,
6487	))
6488	}
6489	}
6490
6491	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6492	///
6493	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_pd&expand=4866)
6494	#[inline]
6495	#[target_feature(enable = "avx512f,avx512vl")]
6496	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6497	#[cfg_attr(test, assert_instr(vscalefpd))]
6498	pub fn _mm_mask_scalef_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6499	unsafe { transmute(src:vscalefpd128(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), mask:k)) }
6500	}
6501
6502	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6503	///
6504	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_pd&expand=4867)
6505	#[inline]
6506	#[target_feature(enable = "avx512f,avx512vl")]
6507	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6508	#[cfg_attr(test, assert_instr(vscalefpd))]
6509	pub fn _mm_maskz_scalef_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6510	unsafe { transmute(src:vscalefpd128(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:k)) }
6511	}
6512
6513	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6514	///
6515	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_ps&expand=2499)
6516	#[inline]
6517	#[target_feature(enable = "avx512f")]
6518	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6519	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6520	#[rustc_legacy_const_generics(`3`)]
6521	pub fn _mm512_fixupimm_ps<const IMM8: i32>(a: __m512, b: __m512, c: __m512i) -> __m512 {
6522	unsafe {
6523	static_assert_uimm_bits!(IMM8, `8`);
6524	let a: Simd = a.as_f32x16();
6525	let b: Simd = b.as_f32x16();
6526	let c: Simd = c.as_i32x16();
6527	let r: Simd = vfixupimmps(a, b, c, IMM8, mask:`0b11111111_11111111`, _MM_FROUND_CUR_DIRECTION);
6528	transmute(src:r)
6529	}
6530	}
6531
6532	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6533	///
6534	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_ps&expand=2500)
6535	#[inline]
6536	#[target_feature(enable = "avx512f")]
6537	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6538	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6539	#[rustc_legacy_const_generics(`4`)]
6540	pub fn _mm512_mask_fixupimm_ps<const IMM8: i32>(
6541	a: __m512,
6542	k: __mmask16,
6543	b: __m512,
6544	c: __m512i,
6545	) -> __m512 {
6546	unsafe {
6547	static_assert_uimm_bits!(IMM8, `8`);
6548	let a: Simd = a.as_f32x16();
6549	let b: Simd = b.as_f32x16();
6550	let c: Simd = c.as_i32x16();
6551	let r: Simd = vfixupimmps(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6552	transmute(src:r)
6553	}
6554	}
6555
6556	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6557	///
6558	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_ps&expand=2501)
6559	#[inline]
6560	#[target_feature(enable = "avx512f")]
6561	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6562	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6563	#[rustc_legacy_const_generics(`4`)]
6564	pub fn _mm512_maskz_fixupimm_ps<const IMM8: i32>(
6565	k: __mmask16,
6566	a: __m512,
6567	b: __m512,
6568	c: __m512i,
6569	) -> __m512 {
6570	unsafe {
6571	static_assert_uimm_bits!(IMM8, `8`);
6572	let a: Simd = a.as_f32x16();
6573	let b: Simd = b.as_f32x16();
6574	let c: Simd = c.as_i32x16();
6575	let r: Simd = vfixupimmpsz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6576	transmute(src:r)
6577	}
6578	}
6579
6580	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6581	///
6582	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_ps&expand=2496)
6583	#[inline]
6584	#[target_feature(enable = "avx512f,avx512vl")]
6585	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6586	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6587	#[rustc_legacy_const_generics(`3`)]
6588	pub fn _mm256_fixupimm_ps<const IMM8: i32>(a: __m256, b: __m256, c: __m256i) -> __m256 {
6589	unsafe {
6590	static_assert_uimm_bits!(IMM8, `8`);
6591	let a: Simd = a.as_f32x8();
6592	let b: Simd = b.as_f32x8();
6593	let c: Simd = c.as_i32x8();
6594	let r: Simd = vfixupimmps256(a, b, c, IMM8, mask:`0b11111111`);
6595	transmute(src:r)
6596	}
6597	}
6598
6599	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6600	///
6601	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_ps&expand=2497)
6602	#[inline]
6603	#[target_feature(enable = "avx512f,avx512vl")]
6604	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6605	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6606	#[rustc_legacy_const_generics(`4`)]
6607	pub fn _mm256_mask_fixupimm_ps<const IMM8: i32>(
6608	a: __m256,
6609	k: __mmask8,
6610	b: __m256,
6611	c: __m256i,
6612	) -> __m256 {
6613	unsafe {
6614	static_assert_uimm_bits!(IMM8, `8`);
6615	let a: Simd = a.as_f32x8();
6616	let b: Simd = b.as_f32x8();
6617	let c: Simd = c.as_i32x8();
6618	let r: Simd = vfixupimmps256(a, b, c, IMM8, mask:k);
6619	transmute(src:r)
6620	}
6621	}
6622
6623	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6624	///
6625	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_ps&expand=2498)
6626	#[inline]
6627	#[target_feature(enable = "avx512f,avx512vl")]
6628	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6629	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6630	#[rustc_legacy_const_generics(`4`)]
6631	pub fn _mm256_maskz_fixupimm_ps<const IMM8: i32>(
6632	k: __mmask8,
6633	a: __m256,
6634	b: __m256,
6635	c: __m256i,
6636	) -> __m256 {
6637	unsafe {
6638	static_assert_uimm_bits!(IMM8, `8`);
6639	let a: Simd = a.as_f32x8();
6640	let b: Simd = b.as_f32x8();
6641	let c: Simd = c.as_i32x8();
6642	let r: Simd = vfixupimmpsz256(a, b, c, IMM8, mask:k);
6643	transmute(src:r)
6644	}
6645	}
6646
6647	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6648	///
6649	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_ps&expand=2493)
6650	#[inline]
6651	#[target_feature(enable = "avx512f,avx512vl")]
6652	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6653	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6654	#[rustc_legacy_const_generics(`3`)]
6655	pub fn _mm_fixupimm_ps<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
6656	unsafe {
6657	static_assert_uimm_bits!(IMM8, `8`);
6658	let a: Simd = a.as_f32x4();
6659	let b: Simd = b.as_f32x4();
6660	let c: Simd = c.as_i32x4();
6661	let r: Simd = vfixupimmps128(a, b, c, IMM8, mask:`0b00001111`);
6662	transmute(src:r)
6663	}
6664	}
6665
6666	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6667	///
6668	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_ps&expand=2494)
6669	#[inline]
6670	#[target_feature(enable = "avx512f,avx512vl")]
6671	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6672	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6673	#[rustc_legacy_const_generics(`4`)]
6674	pub fn _mm_mask_fixupimm_ps<const IMM8: i32>(
6675	a: __m128,
6676	k: __mmask8,
6677	b: __m128,
6678	c: __m128i,
6679	) -> __m128 {
6680	unsafe {
6681	static_assert_uimm_bits!(IMM8, `8`);
6682	let a: Simd = a.as_f32x4();
6683	let b: Simd = b.as_f32x4();
6684	let c: Simd = c.as_i32x4();
6685	let r: Simd = vfixupimmps128(a, b, c, IMM8, mask:k);
6686	transmute(src:r)
6687	}
6688	}
6689
6690	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6691	///
6692	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_ps&expand=2495)
6693	#[inline]
6694	#[target_feature(enable = "avx512f,avx512vl")]
6695	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6696	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`))]
6697	#[rustc_legacy_const_generics(`4`)]
6698	pub fn _mm_maskz_fixupimm_ps<const IMM8: i32>(
6699	k: __mmask8,
6700	a: __m128,
6701	b: __m128,
6702	c: __m128i,
6703	) -> __m128 {
6704	unsafe {
6705	static_assert_uimm_bits!(IMM8, `8`);
6706	let a: Simd = a.as_f32x4();
6707	let b: Simd = b.as_f32x4();
6708	let c: Simd = c.as_i32x4();
6709	let r: Simd = vfixupimmpsz128(a, b, c, IMM8, mask:k);
6710	transmute(src:r)
6711	}
6712	}
6713
6714	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6715	///
6716	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_pd&expand=2490)
6717	#[inline]
6718	#[target_feature(enable = "avx512f")]
6719	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6720	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6721	#[rustc_legacy_const_generics(`3`)]
6722	pub fn _mm512_fixupimm_pd<const IMM8: i32>(a: __m512d, b: __m512d, c: __m512i) -> __m512d {
6723	unsafe {
6724	static_assert_uimm_bits!(IMM8, `8`);
6725	let a: Simd = a.as_f64x8();
6726	let b: Simd = b.as_f64x8();
6727	let c: Simd = c.as_i64x8();
6728	let r: Simd = vfixupimmpd(a, b, c, IMM8, mask:`0b11111111`, _MM_FROUND_CUR_DIRECTION);
6729	transmute(src:r)
6730	}
6731	}
6732
6733	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6734	///
6735	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_pd&expand=2491)
6736	#[inline]
6737	#[target_feature(enable = "avx512f")]
6738	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6739	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6740	#[rustc_legacy_const_generics(`4`)]
6741	pub fn _mm512_mask_fixupimm_pd<const IMM8: i32>(
6742	a: __m512d,
6743	k: __mmask8,
6744	b: __m512d,
6745	c: __m512i,
6746	) -> __m512d {
6747	unsafe {
6748	static_assert_uimm_bits!(IMM8, `8`);
6749	let a: Simd = a.as_f64x8();
6750	let b: Simd = b.as_f64x8();
6751	let c: Simd = c.as_i64x8();
6752	let r: Simd = vfixupimmpd(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6753	transmute(src:r)
6754	}
6755	}
6756
6757	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6758	///
6759	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_pd&expand=2492)
6760	#[inline]
6761	#[target_feature(enable = "avx512f")]
6762	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6763	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6764	#[rustc_legacy_const_generics(`4`)]
6765	pub fn _mm512_maskz_fixupimm_pd<const IMM8: i32>(
6766	k: __mmask8,
6767	a: __m512d,
6768	b: __m512d,
6769	c: __m512i,
6770	) -> __m512d {
6771	unsafe {
6772	static_assert_uimm_bits!(IMM8, `8`);
6773	let a: Simd = a.as_f64x8();
6774	let b: Simd = b.as_f64x8();
6775	let c: Simd = c.as_i64x8();
6776	let r: Simd = vfixupimmpdz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6777	transmute(src:r)
6778	}
6779	}
6780
6781	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6782	///
6783	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_pd&expand=2487)
6784	#[inline]
6785	#[target_feature(enable = "avx512f,avx512vl")]
6786	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6787	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6788	#[rustc_legacy_const_generics(`3`)]
6789	pub fn _mm256_fixupimm_pd<const IMM8: i32>(a: __m256d, b: __m256d, c: __m256i) -> __m256d {
6790	unsafe {
6791	static_assert_uimm_bits!(IMM8, `8`);
6792	let a: Simd = a.as_f64x4();
6793	let b: Simd = b.as_f64x4();
6794	let c: Simd = c.as_i64x4();
6795	let r: Simd = vfixupimmpd256(a, b, c, IMM8, mask:`0b00001111`);
6796	transmute(src:r)
6797	}
6798	}
6799
6800	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6801	///
6802	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_pd&expand=2488)
6803	#[inline]
6804	#[target_feature(enable = "avx512f,avx512vl")]
6805	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6806	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6807	#[rustc_legacy_const_generics(`4`)]
6808	pub fn _mm256_mask_fixupimm_pd<const IMM8: i32>(
6809	a: __m256d,
6810	k: __mmask8,
6811	b: __m256d,
6812	c: __m256i,
6813	) -> __m256d {
6814	unsafe {
6815	static_assert_uimm_bits!(IMM8, `8`);
6816	let a: Simd = a.as_f64x4();
6817	let b: Simd = b.as_f64x4();
6818	let c: Simd = c.as_i64x4();
6819	let r: Simd = vfixupimmpd256(a, b, c, IMM8, mask:k);
6820	transmute(src:r)
6821	}
6822	}
6823
6824	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6825	///
6826	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_pd&expand=2489)
6827	#[inline]
6828	#[target_feature(enable = "avx512f,avx512vl")]
6829	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6830	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6831	#[rustc_legacy_const_generics(`4`)]
6832	pub fn _mm256_maskz_fixupimm_pd<const IMM8: i32>(
6833	k: __mmask8,
6834	a: __m256d,
6835	b: __m256d,
6836	c: __m256i,
6837	) -> __m256d {
6838	unsafe {
6839	static_assert_uimm_bits!(IMM8, `8`);
6840	let a: Simd = a.as_f64x4();
6841	let b: Simd = b.as_f64x4();
6842	let c: Simd = c.as_i64x4();
6843	let r: Simd = vfixupimmpdz256(a, b, c, IMM8, mask:k);
6844	transmute(src:r)
6845	}
6846	}
6847
6848	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6849	///
6850	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_pd&expand=2484)
6851	#[inline]
6852	#[target_feature(enable = "avx512f,avx512vl")]
6853	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6854	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6855	#[rustc_legacy_const_generics(`3`)]
6856	pub fn _mm_fixupimm_pd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
6857	unsafe {
6858	static_assert_uimm_bits!(IMM8, `8`);
6859	let a: Simd = a.as_f64x2();
6860	let b: Simd = b.as_f64x2();
6861	let c: Simd = c.as_i64x2();
6862	let r: Simd = vfixupimmpd128(a, b, c, IMM8, mask:`0b00000011`);
6863	transmute(src:r)
6864	}
6865	}
6866
6867	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6868	///
6869	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_pd&expand=2485)
6870	#[inline]
6871	#[target_feature(enable = "avx512f,avx512vl")]
6872	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6873	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6874	#[rustc_legacy_const_generics(`4`)]
6875	pub fn _mm_mask_fixupimm_pd<const IMM8: i32>(
6876	a: __m128d,
6877	k: __mmask8,
6878	b: __m128d,
6879	c: __m128i,
6880	) -> __m128d {
6881	unsafe {
6882	static_assert_uimm_bits!(IMM8, `8`);
6883	let a: Simd = a.as_f64x2();
6884	let b: Simd = b.as_f64x2();
6885	let c: Simd = c.as_i64x2();
6886	let r: Simd = vfixupimmpd128(a, b, c, IMM8, mask:k);
6887	transmute(src:r)
6888	}
6889	}
6890
6891	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6892	///
6893	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_pd&expand=2486)
6894	#[inline]
6895	#[target_feature(enable = "avx512f,avx512vl")]
6896	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6897	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`))]
6898	#[rustc_legacy_const_generics(`4`)]
6899	pub fn _mm_maskz_fixupimm_pd<const IMM8: i32>(
6900	k: __mmask8,
6901	a: __m128d,
6902	b: __m128d,
6903	c: __m128i,
6904	) -> __m128d {
6905	unsafe {
6906	static_assert_uimm_bits!(IMM8, `8`);
6907	let a: Simd = a.as_f64x2();
6908	let b: Simd = b.as_f64x2();
6909	let c: Simd = c.as_i64x2();
6910	let r: Simd = vfixupimmpdz128(a, b, c, IMM8, mask:k);
6911	transmute(src:r)
6912	}
6913	}
6914
6915	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6916	///
6917	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi32&expand=5867)
6918	#[inline]
6919	#[target_feature(enable = "avx512f")]
6920	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6921	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
6922	#[rustc_legacy_const_generics(`3`)]
6923	pub fn _mm512_ternarylogic_epi32<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
6924	unsafe {
6925	static_assert_uimm_bits!(IMM8, `8`);
6926	let a: Simd = a.as_i32x16();
6927	let b: Simd = b.as_i32x16();
6928	let c: Simd = c.as_i32x16();
6929	let r: Simd = vpternlogd(a, b, c, IMM8);
6930	transmute(src:r)
6931	}
6932	}
6933
6934	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6935	///
6936	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi32&expand=5865)
6937	#[inline]
6938	#[target_feature(enable = "avx512f")]
6939	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6940	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
6941	#[rustc_legacy_const_generics(`4`)]
6942	pub fn _mm512_mask_ternarylogic_epi32<const IMM8: i32>(
6943	src: __m512i,
6944	k: __mmask16,
6945	a: __m512i,
6946	b: __m512i,
6947	) -> __m512i {
6948	unsafe {
6949	static_assert_uimm_bits!(IMM8, `8`);
6950	let src: Simd = src.as_i32x16();
6951	let a: Simd = a.as_i32x16();
6952	let b: Simd = b.as_i32x16();
6953	let r: Simd = vpternlogd(a:src, b:a, c:b, IMM8);
6954	transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6955	}
6956	}
6957
6958	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6959	///
6960	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi32&expand=5866)
6961	#[inline]
6962	#[target_feature(enable = "avx512f")]
6963	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6964	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
6965	#[rustc_legacy_const_generics(`4`)]
6966	pub fn _mm512_maskz_ternarylogic_epi32<const IMM8: i32>(
6967	k: __mmask16,
6968	a: __m512i,
6969	b: __m512i,
6970	c: __m512i,
6971	) -> __m512i {
6972	unsafe {
6973	static_assert_uimm_bits!(IMM8, `8`);
6974	let a: Simd = a.as_i32x16();
6975	let b: Simd = b.as_i32x16();
6976	let c: Simd = c.as_i32x16();
6977	let r: Simd = vpternlogd(a, b, c, IMM8);
6978	transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
6979	}
6980	}
6981
6982	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6983	///
6984	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi32&expand=5864)
6985	#[inline]
6986	#[target_feature(enable = "avx512f,avx512vl")]
6987	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6988	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
6989	#[rustc_legacy_const_generics(`3`)]
6990	pub fn _mm256_ternarylogic_epi32<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
6991	unsafe {
6992	static_assert_uimm_bits!(IMM8, `8`);
6993	let a: Simd = a.as_i32x8();
6994	let b: Simd = b.as_i32x8();
6995	let c: Simd = c.as_i32x8();
6996	let r: Simd = vpternlogd256(a, b, c, IMM8);
6997	transmute(src:r)
6998	}
6999	}
7000
7001	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
7002	///
7003	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi32&expand=5862)
7004	#[inline]
7005	#[target_feature(enable = "avx512f,avx512vl")]
7006	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7007	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
7008	#[rustc_legacy_const_generics(`4`)]
7009	pub fn _mm256_mask_ternarylogic_epi32<const IMM8: i32>(
7010	src: __m256i,
7011	k: __mmask8,
7012	a: __m256i,
7013	b: __m256i,
7014	) -> __m256i {
7015	unsafe {
7016	static_assert_uimm_bits!(IMM8, `8`);
7017	let src: Simd = src.as_i32x8();
7018	let a: Simd = a.as_i32x8();
7019	let b: Simd = b.as_i32x8();
7020	let r: Simd = vpternlogd256(a:src, b:a, c:b, IMM8);
7021	transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
7022	}
7023	}
7024
7025	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
7026	///
7027	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi32&expand=5863)
7028	#[inline]
7029	#[target_feature(enable = "avx512f,avx512vl")]
7030	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7031	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
7032	#[rustc_legacy_const_generics(`4`)]
7033	pub fn _mm256_maskz_ternarylogic_epi32<const IMM8: i32>(
7034	k: __mmask8,
7035	a: __m256i,
7036	b: __m256i,
7037	c: __m256i,
7038	) -> __m256i {
7039	unsafe {
7040	static_assert_uimm_bits!(IMM8, `8`);
7041	let a: Simd = a.as_i32x8();
7042	let b: Simd = b.as_i32x8();
7043	let c: Simd = c.as_i32x8();
7044	let r: Simd = vpternlogd256(a, b, c, IMM8);
7045	transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
7046	}
7047	}
7048
7049	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
7050	///
7051	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi32&expand=5861)
7052	#[inline]
7053	#[target_feature(enable = "avx512f,avx512vl")]
7054	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7055	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
7056	#[rustc_legacy_const_generics(`3`)]
7057	pub fn _mm_ternarylogic_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
7058	unsafe {
7059	static_assert_uimm_bits!(IMM8, `8`);
7060	let a: Simd = a.as_i32x4();
7061	let b: Simd = b.as_i32x4();
7062	let c: Simd = c.as_i32x4();
7063	let r: Simd = vpternlogd128(a, b, c, IMM8);
7064	transmute(src:r)
7065	}
7066	}
7067
7068	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
7069	///
7070	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi32&expand=5859)
7071	#[inline]
7072	#[target_feature(enable = "avx512f,avx512vl")]
7073	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7074	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
7075	#[rustc_legacy_const_generics(`4`)]
7076	pub fn _mm_mask_ternarylogic_epi32<const IMM8: i32>(
7077	src: __m128i,
7078	k: __mmask8,
7079	a: __m128i,
7080	b: __m128i,
7081	) -> __m128i {
7082	unsafe {
7083	static_assert_uimm_bits!(IMM8, `8`);
7084	let src: Simd = src.as_i32x4();
7085	let a: Simd = a.as_i32x4();
7086	let b: Simd = b.as_i32x4();
7087	let r: Simd = vpternlogd128(a:src, b:a, c:b, IMM8);
7088	transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
7089	}
7090	}
7091
7092	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
7093	///
7094	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi32&expand=5860)
7095	#[inline]
7096	#[target_feature(enable = "avx512f,avx512vl")]
7097	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7098	#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = `114`))]
7099	#[rustc_legacy_const_generics(`4`)]
7100	pub fn _mm_maskz_ternarylogic_epi32<const IMM8: i32>(
7101	k: __mmask8,
7102	a: __m128i,
7103	b: __m128i,
7104	c: __m128i,
7105	) -> __m128i {
7106	unsafe {
7107	static_assert_uimm_bits!(IMM8, `8`);
7108	let a: Simd = a.as_i32x4();
7109	let b: Simd = b.as_i32x4();
7110	let c: Simd = c.as_i32x4();
7111	let r: Simd = vpternlogd128(a, b, c, IMM8);
7112	transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
7113	}
7114	}
7115
7116	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
7117	///
7118	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi64&expand=5876)
7119	#[inline]
7120	#[target_feature(enable = "avx512f")]
7121	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7122	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
7123	#[rustc_legacy_const_generics(`3`)]
7124	pub fn _mm512_ternarylogic_epi64<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
7125	unsafe {
7126	static_assert_uimm_bits!(IMM8, `8`);
7127	let a: Simd = a.as_i64x8();
7128	let b: Simd = b.as_i64x8();
7129	let c: Simd = c.as_i64x8();
7130	let r: Simd = vpternlogq(a, b, c, IMM8);
7131	transmute(src:r)
7132	}
7133	}
7134
7135	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
7136	///
7137	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi64&expand=5874)
7138	#[inline]
7139	#[target_feature(enable = "avx512f")]
7140	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7141	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
7142	#[rustc_legacy_const_generics(`4`)]
7143	pub fn _mm512_mask_ternarylogic_epi64<const IMM8: i32>(
7144	src: __m512i,
7145	k: __mmask8,
7146	a: __m512i,
7147	b: __m512i,
7148	) -> __m512i {
7149	unsafe {
7150	static_assert_uimm_bits!(IMM8, `8`);
7151	let src: Simd = src.as_i64x8();
7152	let a: Simd = a.as_i64x8();
7153	let b: Simd = b.as_i64x8();
7154	let r: Simd = vpternlogq(a:src, b:a, c:b, IMM8);
7155	transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
7156	}
7157	}
7158
7159	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
7160	///
7161	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi64&expand=5875)
7162	#[inline]
7163	#[target_feature(enable = "avx512f")]
7164	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7165	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
7166	#[rustc_legacy_const_generics(`4`)]
7167	pub fn _mm512_maskz_ternarylogic_epi64<const IMM8: i32>(
7168	k: __mmask8,
7169	a: __m512i,
7170	b: __m512i,
7171	c: __m512i,
7172	) -> __m512i {
7173	unsafe {
7174	static_assert_uimm_bits!(IMM8, `8`);
7175	let a: Simd = a.as_i64x8();
7176	let b: Simd = b.as_i64x8();
7177	let c: Simd = c.as_i64x8();
7178	let r: Simd = vpternlogq(a, b, c, IMM8);
7179	transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x8::ZERO))
7180	}
7181	}
7182
7183	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
7184	///
7185	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi64&expand=5873)
7186	#[inline]
7187	#[target_feature(enable = "avx512f,avx512vl")]
7188	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7189	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
7190	#[rustc_legacy_const_generics(`3`)]
7191	pub fn _mm256_ternarylogic_epi64<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
7192	unsafe {
7193	static_assert_uimm_bits!(IMM8, `8`);
7194	let a: Simd = a.as_i64x4();
7195	let b: Simd = b.as_i64x4();
7196	let c: Simd = c.as_i64x4();
7197	let r: Simd = vpternlogq256(a, b, c, IMM8);
7198	transmute(src:r)
7199	}
7200	}
7201
7202	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
7203	///
7204	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi64&expand=5871)
7205	#[inline]
7206	#[target_feature(enable = "avx512f,avx512vl")]
7207	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7208	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
7209	#[rustc_legacy_const_generics(`4`)]
7210	pub fn _mm256_mask_ternarylogic_epi64<const IMM8: i32>(
7211	src: __m256i,
7212	k: __mmask8,
7213	a: __m256i,
7214	b: __m256i,
7215	) -> __m256i {
7216	unsafe {
7217	static_assert_uimm_bits!(IMM8, `8`);
7218	let src: Simd = src.as_i64x4();
7219	let a: Simd = a.as_i64x4();
7220	let b: Simd = b.as_i64x4();
7221	let r: Simd = vpternlogq256(a:src, b:a, c:b, IMM8);
7222	transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
7223	}
7224	}
7225
7226	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
7227	///
7228	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi64&expand=5872)
7229	#[inline]
7230	#[target_feature(enable = "avx512f,avx512vl")]
7231	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7232	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
7233	#[rustc_legacy_const_generics(`4`)]
7234	pub fn _mm256_maskz_ternarylogic_epi64<const IMM8: i32>(
7235	k: __mmask8,
7236	a: __m256i,
7237	b: __m256i,
7238	c: __m256i,
7239	) -> __m256i {
7240	unsafe {
7241	static_assert_uimm_bits!(IMM8, `8`);
7242	let a: Simd = a.as_i64x4();
7243	let b: Simd = b.as_i64x4();
7244	let c: Simd = c.as_i64x4();
7245	let r: Simd = vpternlogq256(a, b, c, IMM8);
7246	transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x4::ZERO))
7247	}
7248	}
7249
7250	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
7251	///
7252	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi64&expand=5870)
7253	#[inline]
7254	#[target_feature(enable = "avx512f,avx512vl")]
7255	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7256	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
7257	#[rustc_legacy_const_generics(`3`)]
7258	pub fn _mm_ternarylogic_epi64<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
7259	unsafe {
7260	static_assert_uimm_bits!(IMM8, `8`);
7261	let a: Simd = a.as_i64x2();
7262	let b: Simd = b.as_i64x2();
7263	let c: Simd = c.as_i64x2();
7264	let r: Simd = vpternlogq128(a, b, c, IMM8);
7265	transmute(src:r)
7266	}
7267	}
7268
7269	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
7270	///
7271	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi64&expand=5868)
7272	#[inline]
7273	#[target_feature(enable = "avx512f,avx512vl")]
7274	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7275	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
7276	#[rustc_legacy_const_generics(`4`)]
7277	pub fn _mm_mask_ternarylogic_epi64<const IMM8: i32>(
7278	src: __m128i,
7279	k: __mmask8,
7280	a: __m128i,
7281	b: __m128i,
7282	) -> __m128i {
7283	unsafe {
7284	static_assert_uimm_bits!(IMM8, `8`);
7285	let src: Simd = src.as_i64x2();
7286	let a: Simd = a.as_i64x2();
7287	let b: Simd = b.as_i64x2();
7288	let r: Simd = vpternlogq128(a:src, b:a, c:b, IMM8);
7289	transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
7290	}
7291	}
7292
7293	/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
7294	///
7295	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi64&expand=5869)
7296	#[inline]
7297	#[target_feature(enable = "avx512f,avx512vl")]
7298	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7299	#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = `114`))]
7300	#[rustc_legacy_const_generics(`4`)]
7301	pub fn _mm_maskz_ternarylogic_epi64<const IMM8: i32>(
7302	k: __mmask8,
7303	a: __m128i,
7304	b: __m128i,
7305	c: __m128i,
7306	) -> __m128i {
7307	unsafe {
7308	static_assert_uimm_bits!(IMM8, `8`);
7309	let a: Simd = a.as_i64x2();
7310	let b: Simd = b.as_i64x2();
7311	let c: Simd = c.as_i64x2();
7312	let r: Simd = vpternlogq128(a, b, c, IMM8);
7313	transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x2::ZERO))
7314	}
7315	}
7316
7317	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7318	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7319	/// _MM_MANT_NORM_1_2 // interval [1, 2)
7320	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
7321	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
7322	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7323	/// The sign is determined by sc which can take the following values:
7324	/// _MM_MANT_SIGN_src // sign = sign(src)
7325	/// _MM_MANT_SIGN_zero // sign = 0
7326	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7327	///
7328	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_ps&expand=2880)
7329	#[inline]
7330	#[target_feature(enable = "avx512f")]
7331	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7332	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
7333	#[rustc_legacy_const_generics(`1`, `2`)]
7334	pub fn _mm512_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7335	a: __m512,
7336	) -> __m512 {
7337	unsafe {
7338	static_assert_uimm_bits!(NORM, `4`);
7339	static_assert_uimm_bits!(SIGN, `2`);
7340	let a: Simd = a.as_f32x16();
7341	let zero: Simd = f32x16::ZERO;
7342	let r: Simd = vgetmantps(
7343	a,
7344	SIGN << `2` \| NORM,
7345	src:zero,
7346	m:`0b11111111_11111111`,
7347	_MM_FROUND_CUR_DIRECTION,
7348	);
7349	transmute(src:r)
7350	}
7351	}
7352
7353	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7354	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7355	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7356	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7357	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7358	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7359	/// The sign is determined by sc which can take the following values:\
7360	/// _MM_MANT_SIGN_src // sign = sign(src)\
7361	/// _MM_MANT_SIGN_zero // sign = 0\
7362	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7363	///
7364	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_ps&expand=2881)
7365	#[inline]
7366	#[target_feature(enable = "avx512f")]
7367	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7368	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
7369	#[rustc_legacy_const_generics(`3`, `4`)]
7370	pub fn _mm512_mask_getmant_ps<
7371	const NORM: _MM_MANTISSA_NORM_ENUM,
7372	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7373	>(
7374	src: __m512,
7375	k: __mmask16,
7376	a: __m512,
7377	) -> __m512 {
7378	unsafe {
7379	static_assert_uimm_bits!(NORM, `4`);
7380	static_assert_uimm_bits!(SIGN, `2`);
7381	let a: Simd = a.as_f32x16();
7382	let src: Simd = src.as_f32x16();
7383	let r: Simd = vgetmantps(a, SIGN << `2` \| NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
7384	transmute(src:r)
7385	}
7386	}
7387
7388	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7389	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7390	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7391	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7392	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7393	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7394	/// The sign is determined by sc which can take the following values:\
7395	/// _MM_MANT_SIGN_src // sign = sign(src)\
7396	/// _MM_MANT_SIGN_zero // sign = 0\
7397	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7398	///
7399	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_ps&expand=2882)
7400	#[inline]
7401	#[target_feature(enable = "avx512f")]
7402	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7403	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
7404	#[rustc_legacy_const_generics(`2`, `3`)]
7405	pub fn _mm512_maskz_getmant_ps<
7406	const NORM: _MM_MANTISSA_NORM_ENUM,
7407	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7408	>(
7409	k: __mmask16,
7410	a: __m512,
7411	) -> __m512 {
7412	unsafe {
7413	static_assert_uimm_bits!(NORM, `4`);
7414	static_assert_uimm_bits!(SIGN, `2`);
7415	let a: Simd = a.as_f32x16();
7416	let r: Simd = vgetmantps(
7417	a,
7418	SIGN << `2` \| NORM,
7419	src:f32x16::ZERO,
7420	m:k,
7421	_MM_FROUND_CUR_DIRECTION,
7422	);
7423	transmute(src:r)
7424	}
7425	}
7426
7427	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7428	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7429	/// _MM_MANT_NORM_1_2 // interval [1, 2)
7430	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
7431	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
7432	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7433	/// The sign is determined by sc which can take the following values:
7434	/// _MM_MANT_SIGN_src // sign = sign(src)
7435	/// _MM_MANT_SIGN_zero // sign = 0
7436	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7437	///
7438	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_ps&expand=2877)
7439	#[inline]
7440	#[target_feature(enable = "avx512f,avx512vl")]
7441	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7442	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
7443	#[rustc_legacy_const_generics(`1`, `2`)]
7444	pub fn _mm256_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7445	a: __m256,
7446	) -> __m256 {
7447	unsafe {
7448	static_assert_uimm_bits!(NORM, `4`);
7449	static_assert_uimm_bits!(SIGN, `2`);
7450	let a: Simd = a.as_f32x8();
7451	let r: Simd = vgetmantps256(a, SIGN << `2` \| NORM, src:f32x8::ZERO, m:`0b11111111`);
7452	transmute(src:r)
7453	}
7454	}
7455
7456	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7457	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7458	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7459	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7460	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7461	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7462	/// The sign is determined by sc which can take the following values:\
7463	/// _MM_MANT_SIGN_src // sign = sign(src)\
7464	/// _MM_MANT_SIGN_zero // sign = 0\
7465	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7466	///
7467	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_ps&expand=2878)
7468	#[inline]
7469	#[target_feature(enable = "avx512f,avx512vl")]
7470	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7471	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
7472	#[rustc_legacy_const_generics(`3`, `4`)]
7473	pub fn _mm256_mask_getmant_ps<
7474	const NORM: _MM_MANTISSA_NORM_ENUM,
7475	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7476	>(
7477	src: __m256,
7478	k: __mmask8,
7479	a: __m256,
7480	) -> __m256 {
7481	unsafe {
7482	static_assert_uimm_bits!(NORM, `4`);
7483	static_assert_uimm_bits!(SIGN, `2`);
7484	let a: Simd = a.as_f32x8();
7485	let src: Simd = src.as_f32x8();
7486	let r: Simd = vgetmantps256(a, SIGN << `2` \| NORM, src, m:k);
7487	transmute(src:r)
7488	}
7489	}
7490
7491	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7492	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7493	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7494	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7495	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7496	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7497	/// The sign is determined by sc which can take the following values:\
7498	/// _MM_MANT_SIGN_src // sign = sign(src)\
7499	/// _MM_MANT_SIGN_zero // sign = 0\
7500	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7501	///
7502	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_ps&expand=2879)
7503	#[inline]
7504	#[target_feature(enable = "avx512f,avx512vl")]
7505	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7506	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
7507	#[rustc_legacy_const_generics(`2`, `3`)]
7508	pub fn _mm256_maskz_getmant_ps<
7509	const NORM: _MM_MANTISSA_NORM_ENUM,
7510	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7511	>(
7512	k: __mmask8,
7513	a: __m256,
7514	) -> __m256 {
7515	unsafe {
7516	static_assert_uimm_bits!(NORM, `4`);
7517	static_assert_uimm_bits!(SIGN, `2`);
7518	let a: Simd = a.as_f32x8();
7519	let r: Simd = vgetmantps256(a, SIGN << `2` \| NORM, src:f32x8::ZERO, m:k);
7520	transmute(src:r)
7521	}
7522	}
7523
7524	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7525	/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7526	/// _MM_MANT_NORM_1_2 // interval [1, 2)
7527	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
7528	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
7529	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7530	/// The sign is determined by sc which can take the following values:
7531	/// _MM_MANT_SIGN_src // sign = sign(src)
7532	/// _MM_MANT_SIGN_zero // sign = 0
7533	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7534	///
7535	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_ps&expand=2874)
7536	#[inline]
7537	#[target_feature(enable = "avx512f,avx512vl")]
7538	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7539	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
7540	#[rustc_legacy_const_generics(`1`, `2`)]
7541	pub fn _mm_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7542	a: __m128,
7543	) -> __m128 {
7544	unsafe {
7545	static_assert_uimm_bits!(NORM, `4`);
7546	static_assert_uimm_bits!(SIGN, `2`);
7547	let a: Simd = a.as_f32x4();
7548	let r: Simd = vgetmantps128(a, SIGN << `2` \| NORM, src:f32x4::ZERO, m:`0b00001111`);
7549	transmute(src:r)
7550	}
7551	}
7552
7553	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7554	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7555	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7556	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7557	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7558	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7559	/// The sign is determined by sc which can take the following values:\
7560	/// _MM_MANT_SIGN_src // sign = sign(src)\
7561	/// _MM_MANT_SIGN_zero // sign = 0\
7562	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7563	///
7564	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_ps&expand=2875)
7565	#[inline]
7566	#[target_feature(enable = "avx512f,avx512vl")]
7567	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7568	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
7569	#[rustc_legacy_const_generics(`3`, `4`)]
7570	pub fn _mm_mask_getmant_ps<
7571	const NORM: _MM_MANTISSA_NORM_ENUM,
7572	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7573	>(
7574	src: __m128,
7575	k: __mmask8,
7576	a: __m128,
7577	) -> __m128 {
7578	unsafe {
7579	static_assert_uimm_bits!(NORM, `4`);
7580	static_assert_uimm_bits!(SIGN, `2`);
7581	let a: Simd = a.as_f32x4();
7582	let src: Simd = src.as_f32x4();
7583	let r: Simd = vgetmantps128(a, SIGN << `2` \| NORM, src, m:k);
7584	transmute(src:r)
7585	}
7586	}
7587
7588	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7589	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7590	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7591	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7592	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7593	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7594	/// The sign is determined by sc which can take the following values:\
7595	/// _MM_MANT_SIGN_src // sign = sign(src)\
7596	/// _MM_MANT_SIGN_zero // sign = 0\
7597	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7598	///
7599	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_ps&expand=2876)
7600	#[inline]
7601	#[target_feature(enable = "avx512f,avx512vl")]
7602	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7603	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`))]
7604	#[rustc_legacy_const_generics(`2`, `3`)]
7605	pub fn _mm_maskz_getmant_ps<
7606	const NORM: _MM_MANTISSA_NORM_ENUM,
7607	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7608	>(
7609	k: __mmask8,
7610	a: __m128,
7611	) -> __m128 {
7612	unsafe {
7613	static_assert_uimm_bits!(NORM, `4`);
7614	static_assert_uimm_bits!(SIGN, `2`);
7615	let a: Simd = a.as_f32x4();
7616	let r: Simd = vgetmantps128(a, SIGN << `2` \| NORM, src:f32x4::ZERO, m:k);
7617	transmute(src:r)
7618	}
7619	}
7620
7621	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7622	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7623	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7624	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7625	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7626	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7627	/// The sign is determined by sc which can take the following values:\
7628	/// _MM_MANT_SIGN_src // sign = sign(src)\
7629	/// _MM_MANT_SIGN_zero // sign = 0\
7630	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7631	///
7632	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_pd&expand=2871)
7633	#[inline]
7634	#[target_feature(enable = "avx512f")]
7635	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7636	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7637	#[rustc_legacy_const_generics(`1`, `2`)]
7638	pub fn _mm512_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7639	a: __m512d,
7640	) -> __m512d {
7641	unsafe {
7642	static_assert_uimm_bits!(NORM, `4`);
7643	static_assert_uimm_bits!(SIGN, `2`);
7644	let a: Simd = a.as_f64x8();
7645	let zero: Simd = f64x8::ZERO;
7646	let r: Simd = vgetmantpd(
7647	a,
7648	SIGN << `2` \| NORM,
7649	src:zero,
7650	m:`0b11111111`,
7651	_MM_FROUND_CUR_DIRECTION,
7652	);
7653	transmute(src:r)
7654	}
7655	}
7656
7657	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7658	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7659	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7660	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7661	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7662	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7663	/// The sign is determined by sc which can take the following values:\
7664	/// _MM_MANT_SIGN_src // sign = sign(src)\
7665	/// _MM_MANT_SIGN_zero // sign = 0\
7666	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7667	///
7668	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_pd&expand=2872)
7669	#[inline]
7670	#[target_feature(enable = "avx512f")]
7671	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7672	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7673	#[rustc_legacy_const_generics(`3`, `4`)]
7674	pub fn _mm512_mask_getmant_pd<
7675	const NORM: _MM_MANTISSA_NORM_ENUM,
7676	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7677	>(
7678	src: __m512d,
7679	k: __mmask8,
7680	a: __m512d,
7681	) -> __m512d {
7682	unsafe {
7683	static_assert_uimm_bits!(NORM, `4`);
7684	static_assert_uimm_bits!(SIGN, `2`);
7685	let a: Simd = a.as_f64x8();
7686	let src: Simd = src.as_f64x8();
7687	let r: Simd = vgetmantpd(a, SIGN << `2` \| NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
7688	transmute(src:r)
7689	}
7690	}
7691
7692	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7693	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7694	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7695	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7696	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7697	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7698	/// The sign is determined by sc which can take the following values:\
7699	/// _MM_MANT_SIGN_src // sign = sign(src)\
7700	/// _MM_MANT_SIGN_zero // sign = 0\
7701	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7702	///
7703	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_pd&expand=2873)
7704	#[inline]
7705	#[target_feature(enable = "avx512f")]
7706	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7707	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7708	#[rustc_legacy_const_generics(`2`, `3`)]
7709	pub fn _mm512_maskz_getmant_pd<
7710	const NORM: _MM_MANTISSA_NORM_ENUM,
7711	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7712	>(
7713	k: __mmask8,
7714	a: __m512d,
7715	) -> __m512d {
7716	unsafe {
7717	static_assert_uimm_bits!(NORM, `4`);
7718	static_assert_uimm_bits!(SIGN, `2`);
7719	let a: Simd = a.as_f64x8();
7720	let r: Simd = vgetmantpd(
7721	a,
7722	SIGN << `2` \| NORM,
7723	src:f64x8::ZERO,
7724	m:k,
7725	_MM_FROUND_CUR_DIRECTION,
7726	);
7727	transmute(src:r)
7728	}
7729	}
7730
7731	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7732	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7733	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7734	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7735	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7736	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7737	/// The sign is determined by sc which can take the following values:\
7738	/// _MM_MANT_SIGN_src // sign = sign(src)\
7739	/// _MM_MANT_SIGN_zero // sign = 0\
7740	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7741	///
7742	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_pd&expand=2868)
7743	#[inline]
7744	#[target_feature(enable = "avx512f,avx512vl")]
7745	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7746	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7747	#[rustc_legacy_const_generics(`1`, `2`)]
7748	pub fn _mm256_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7749	a: __m256d,
7750	) -> __m256d {
7751	unsafe {
7752	static_assert_uimm_bits!(NORM, `4`);
7753	static_assert_uimm_bits!(SIGN, `2`);
7754	let a: Simd = a.as_f64x4();
7755	let r: Simd = vgetmantpd256(a, SIGN << `2` \| NORM, src:f64x4::ZERO, m:`0b00001111`);
7756	transmute(src:r)
7757	}
7758	}
7759
7760	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7761	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7762	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7763	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7764	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7765	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7766	/// The sign is determined by sc which can take the following values:\
7767	/// _MM_MANT_SIGN_src // sign = sign(src)\
7768	/// _MM_MANT_SIGN_zero // sign = 0\
7769	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7770	///
7771	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_pd&expand=2869)
7772	#[inline]
7773	#[target_feature(enable = "avx512f,avx512vl")]
7774	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7775	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7776	#[rustc_legacy_const_generics(`3`, `4`)]
7777	pub fn _mm256_mask_getmant_pd<
7778	const NORM: _MM_MANTISSA_NORM_ENUM,
7779	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7780	>(
7781	src: __m256d,
7782	k: __mmask8,
7783	a: __m256d,
7784	) -> __m256d {
7785	unsafe {
7786	static_assert_uimm_bits!(NORM, `4`);
7787	static_assert_uimm_bits!(SIGN, `2`);
7788	let a: Simd = a.as_f64x4();
7789	let src: Simd = src.as_f64x4();
7790	let r: Simd = vgetmantpd256(a, SIGN << `2` \| NORM, src, m:k);
7791	transmute(src:r)
7792	}
7793	}
7794
7795	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7796	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7797	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7798	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7799	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7800	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7801	/// The sign is determined by sc which can take the following values:\
7802	/// _MM_MANT_SIGN_src // sign = sign(src)\
7803	/// _MM_MANT_SIGN_zero // sign = 0\
7804	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7805	///
7806	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_pd&expand=2870)
7807	#[inline]
7808	#[target_feature(enable = "avx512f,avx512vl")]
7809	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7810	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7811	#[rustc_legacy_const_generics(`2`, `3`)]
7812	pub fn _mm256_maskz_getmant_pd<
7813	const NORM: _MM_MANTISSA_NORM_ENUM,
7814	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7815	>(
7816	k: __mmask8,
7817	a: __m256d,
7818	) -> __m256d {
7819	unsafe {
7820	static_assert_uimm_bits!(NORM, `4`);
7821	static_assert_uimm_bits!(SIGN, `2`);
7822	let a: Simd = a.as_f64x4();
7823	let r: Simd = vgetmantpd256(a, SIGN << `2` \| NORM, src:f64x4::ZERO, m:k);
7824	transmute(src:r)
7825	}
7826	}
7827
7828	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7829	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7830	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7831	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7832	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7833	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7834	/// The sign is determined by sc which can take the following values:\
7835	/// _MM_MANT_SIGN_src // sign = sign(src)\
7836	/// _MM_MANT_SIGN_zero // sign = 0\
7837	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7838	///
7839	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_pd&expand=2865)
7840	#[inline]
7841	#[target_feature(enable = "avx512f,avx512vl")]
7842	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7843	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7844	#[rustc_legacy_const_generics(`1`, `2`)]
7845	pub fn _mm_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7846	a: __m128d,
7847	) -> __m128d {
7848	unsafe {
7849	static_assert_uimm_bits!(NORM, `4`);
7850	static_assert_uimm_bits!(SIGN, `2`);
7851	let a: Simd = a.as_f64x2();
7852	let r: Simd = vgetmantpd128(a, SIGN << `2` \| NORM, src:f64x2::ZERO, m:`0b00000011`);
7853	transmute(src:r)
7854	}
7855	}
7856
7857	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7858	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7859	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7860	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7861	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7862	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7863	/// The sign is determined by sc which can take the following values:\
7864	/// _MM_MANT_SIGN_src // sign = sign(src)\
7865	/// _MM_MANT_SIGN_zero // sign = 0\
7866	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7867	///
7868	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_pd&expand=2866)
7869	#[inline]
7870	#[target_feature(enable = "avx512f,avx512vl")]
7871	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7872	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7873	#[rustc_legacy_const_generics(`3`, `4`)]
7874	pub fn _mm_mask_getmant_pd<
7875	const NORM: _MM_MANTISSA_NORM_ENUM,
7876	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7877	>(
7878	src: __m128d,
7879	k: __mmask8,
7880	a: __m128d,
7881	) -> __m128d {
7882	unsafe {
7883	static_assert_uimm_bits!(NORM, `4`);
7884	static_assert_uimm_bits!(SIGN, `2`);
7885	let a: Simd = a.as_f64x2();
7886	let src: Simd = src.as_f64x2();
7887	let r: Simd = vgetmantpd128(a, SIGN << `2` \| NORM, src, m:k);
7888	transmute(src:r)
7889	}
7890	}
7891
7892	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7893	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7894	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7895	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7896	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7897	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7898	/// The sign is determined by sc which can take the following values:\
7899	/// _MM_MANT_SIGN_src // sign = sign(src)\
7900	/// _MM_MANT_SIGN_zero // sign = 0\
7901	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7902	///
7903	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_pd&expand=2867)
7904	#[inline]
7905	#[target_feature(enable = "avx512f,avx512vl")]
7906	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7907	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`))]
7908	#[rustc_legacy_const_generics(`2`, `3`)]
7909	pub fn _mm_maskz_getmant_pd<
7910	const NORM: _MM_MANTISSA_NORM_ENUM,
7911	const SIGN: _MM_MANTISSA_SIGN_ENUM,
7912	>(
7913	k: __mmask8,
7914	a: __m128d,
7915	) -> __m128d {
7916	unsafe {
7917	static_assert_uimm_bits!(NORM, `4`);
7918	static_assert_uimm_bits!(SIGN, `2`);
7919	let a: Simd = a.as_f64x2();
7920	let r: Simd = vgetmantpd128(a, SIGN << `2` \| NORM, src:f64x2::ZERO, m:k);
7921	transmute(src:r)
7922	}
7923	}
7924
7925	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7926	///
7927	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7928	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7929	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7930	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7931	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7932	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7933	///
7934	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_ps&expand=145)
7935	#[inline]
7936	#[target_feature(enable = "avx512f")]
7937	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7938	#[cfg_attr(test, assert_instr(vaddps, ROUNDING = `8`))]
7939	#[rustc_legacy_const_generics(`2`)]
7940	pub fn _mm512_add_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7941	unsafe {
7942	static_assert_rounding!(ROUNDING);
7943	let a: Simd = a.as_f32x16();
7944	let b: Simd = b.as_f32x16();
7945	let r: Simd = vaddps(a, b, ROUNDING);
7946	transmute(src:r)
7947	}
7948	}
7949
7950	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7951	///
7952	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7953	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7954	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7955	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7956	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7957	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7958	///
7959	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_ps&expand=146)
7960	#[inline]
7961	#[target_feature(enable = "avx512f")]
7962	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7963	#[cfg_attr(test, assert_instr(vaddps, ROUNDING = `8`))]
7964	#[rustc_legacy_const_generics(`4`)]
7965	pub fn _mm512_mask_add_round_ps<const ROUNDING: i32>(
7966	src: __m512,
7967	k: __mmask16,
7968	a: __m512,
7969	b: __m512,
7970	) -> __m512 {
7971	unsafe {
7972	static_assert_rounding!(ROUNDING);
7973	let a: Simd = a.as_f32x16();
7974	let b: Simd = b.as_f32x16();
7975	let r: Simd = vaddps(a, b, ROUNDING);
7976	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
7977	}
7978	}
7979
7980	/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7981	///
7982	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7983	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7984	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7985	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7986	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7987	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7988	///
7989	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_ps&expand=147)
7990	#[inline]
7991	#[target_feature(enable = "avx512f")]
7992	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7993	#[cfg_attr(test, assert_instr(vaddps, ROUNDING = `8`))]
7994	#[rustc_legacy_const_generics(`3`)]
7995	pub fn _mm512_maskz_add_round_ps<const ROUNDING: i32>(
7996	k: __mmask16,
7997	a: __m512,
7998	b: __m512,
7999	) -> __m512 {
8000	unsafe {
8001	static_assert_rounding!(ROUNDING);
8002	let a: Simd = a.as_f32x16();
8003	let b: Simd = b.as_f32x16();
8004	let r: Simd = vaddps(a, b, ROUNDING);
8005	transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
8006	}
8007	}
8008
8009	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
8010	///
8011	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8012	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8013	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8014	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8015	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8016	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8017	///
8018	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_pd&expand=142)
8019	#[inline]
8020	#[target_feature(enable = "avx512f")]
8021	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8022	#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = `8`))]
8023	#[rustc_legacy_const_generics(`2`)]
8024	pub fn _mm512_add_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8025	unsafe {
8026	static_assert_rounding!(ROUNDING);
8027	let a: Simd = a.as_f64x8();
8028	let b: Simd = b.as_f64x8();
8029	let r: Simd = vaddpd(a, b, ROUNDING);
8030	transmute(src:r)
8031	}
8032	}
8033
8034	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8035	///
8036	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8037	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8038	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8039	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8040	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8041	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8042	///
8043	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_pd&expand=143)
8044	#[inline]
8045	#[target_feature(enable = "avx512f")]
8046	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8047	#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = `8`))]
8048	#[rustc_legacy_const_generics(`4`)]
8049	pub fn _mm512_mask_add_round_pd<const ROUNDING: i32>(
8050	src: __m512d,
8051	k: __mmask8,
8052	a: __m512d,
8053	b: __m512d,
8054	) -> __m512d {
8055	unsafe {
8056	static_assert_rounding!(ROUNDING);
8057	let a: Simd = a.as_f64x8();
8058	let b: Simd = b.as_f64x8();
8059	let r: Simd = vaddpd(a, b, ROUNDING);
8060	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
8061	}
8062	}
8063
8064	/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8065	///
8066	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8067	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8068	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8069	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8070	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8071	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8072	///
8073	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_pd&expand=144)
8074	#[inline]
8075	#[target_feature(enable = "avx512f")]
8076	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8077	#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = `8`))]
8078	#[rustc_legacy_const_generics(`3`)]
8079	pub fn _mm512_maskz_add_round_pd<const ROUNDING: i32>(
8080	k: __mmask8,
8081	a: __m512d,
8082	b: __m512d,
8083	) -> __m512d {
8084	unsafe {
8085	static_assert_rounding!(ROUNDING);
8086	let a: Simd = a.as_f64x8();
8087	let b: Simd = b.as_f64x8();
8088	let r: Simd = vaddpd(a, b, ROUNDING);
8089	transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
8090	}
8091	}
8092
8093	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
8094	///
8095	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8096	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8097	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8098	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8099	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8100	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8101	///
8102	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_ps&expand=5739)
8103	#[inline]
8104	#[target_feature(enable = "avx512f")]
8105	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8106	#[cfg_attr(test, assert_instr(vsubps, ROUNDING = `8`))]
8107	#[rustc_legacy_const_generics(`2`)]
8108	pub fn _mm512_sub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
8109	unsafe {
8110	static_assert_rounding!(ROUNDING);
8111	let a: Simd = a.as_f32x16();
8112	let b: Simd = b.as_f32x16();
8113	let r: Simd = vsubps(a, b, ROUNDING);
8114	transmute(src:r)
8115	}
8116	}
8117
8118	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8119	///
8120	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8121	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8122	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8123	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8124	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8125	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8126	///
8127	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_ps&expand=5737)
8128	#[inline]
8129	#[target_feature(enable = "avx512f")]
8130	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8131	#[cfg_attr(test, assert_instr(vsubps, ROUNDING = `8`))]
8132	#[rustc_legacy_const_generics(`4`)]
8133	pub fn _mm512_mask_sub_round_ps<const ROUNDING: i32>(
8134	src: __m512,
8135	k: __mmask16,
8136	a: __m512,
8137	b: __m512,
8138	) -> __m512 {
8139	unsafe {
8140	static_assert_rounding!(ROUNDING);
8141	let a: Simd = a.as_f32x16();
8142	let b: Simd = b.as_f32x16();
8143	let r: Simd = vsubps(a, b, ROUNDING);
8144	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
8145	}
8146	}
8147
8148	/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8149	///
8150	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8151	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8152	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8153	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8154	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8155	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8156	///
8157	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_ps&expand=5738)
8158	#[inline]
8159	#[target_feature(enable = "avx512f")]
8160	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8161	#[cfg_attr(test, assert_instr(vsubps, ROUNDING = `8`))]
8162	#[rustc_legacy_const_generics(`3`)]
8163	pub fn _mm512_maskz_sub_round_ps<const ROUNDING: i32>(
8164	k: __mmask16,
8165	a: __m512,
8166	b: __m512,
8167	) -> __m512 {
8168	unsafe {
8169	static_assert_rounding!(ROUNDING);
8170	let a: Simd = a.as_f32x16();
8171	let b: Simd = b.as_f32x16();
8172	let r: Simd = vsubps(a, b, ROUNDING);
8173	transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
8174	}
8175	}
8176
8177	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
8178	///
8179	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8180	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8181	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8182	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8183	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8184	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8185	///
8186	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_pd&expand=5736)
8187	#[inline]
8188	#[target_feature(enable = "avx512f")]
8189	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8190	#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = `8`))]
8191	#[rustc_legacy_const_generics(`2`)]
8192	pub fn _mm512_sub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8193	unsafe {
8194	static_assert_rounding!(ROUNDING);
8195	let a: Simd = a.as_f64x8();
8196	let b: Simd = b.as_f64x8();
8197	let r: Simd = vsubpd(a, b, ROUNDING);
8198	transmute(src:r)
8199	}
8200	}
8201
8202	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8203	///
8204	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8205	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8206	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8207	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8208	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8209	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8210	///
8211	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_pd&expand=5734)
8212	#[inline]
8213	#[target_feature(enable = "avx512f")]
8214	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8215	#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = `8`))]
8216	#[rustc_legacy_const_generics(`4`)]
8217	pub fn _mm512_mask_sub_round_pd<const ROUNDING: i32>(
8218	src: __m512d,
8219	k: __mmask8,
8220	a: __m512d,
8221	b: __m512d,
8222	) -> __m512d {
8223	unsafe {
8224	static_assert_rounding!(ROUNDING);
8225	let a: Simd = a.as_f64x8();
8226	let b: Simd = b.as_f64x8();
8227	let r: Simd = vsubpd(a, b, ROUNDING);
8228	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
8229	}
8230	}
8231
8232	/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8233	///
8234	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8235	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8236	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8237	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8238	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8239	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8240	///
8241	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_pd&expand=5735)
8242	#[inline]
8243	#[target_feature(enable = "avx512f")]
8244	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8245	#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = `8`))]
8246	#[rustc_legacy_const_generics(`3`)]
8247	pub fn _mm512_maskz_sub_round_pd<const ROUNDING: i32>(
8248	k: __mmask8,
8249	a: __m512d,
8250	b: __m512d,
8251	) -> __m512d {
8252	unsafe {
8253	static_assert_rounding!(ROUNDING);
8254	let a: Simd = a.as_f64x8();
8255	let b: Simd = b.as_f64x8();
8256	let r: Simd = vsubpd(a, b, ROUNDING);
8257	transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
8258	}
8259	}
8260
8261	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
8262	///
8263	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8264	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8265	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8266	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8267	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8268	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8269	///
8270	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_ps&expand=3940)
8271	#[inline]
8272	#[target_feature(enable = "avx512f")]
8273	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8274	#[cfg_attr(test, assert_instr(vmulps, ROUNDING = `8`))]
8275	#[rustc_legacy_const_generics(`2`)]
8276	pub fn _mm512_mul_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
8277	unsafe {
8278	static_assert_rounding!(ROUNDING);
8279	let a: Simd = a.as_f32x16();
8280	let b: Simd = b.as_f32x16();
8281	let r: Simd = vmulps(a, b, ROUNDING);
8282	transmute(src:r)
8283	}
8284	}
8285
8286	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8287	///
8288	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8289	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8290	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8291	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8292	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8293	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8294	///
8295	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_ps&expand=3938)
8296	#[inline]
8297	#[target_feature(enable = "avx512f")]
8298	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8299	#[cfg_attr(test, assert_instr(vmulps, ROUNDING = `8`))]
8300	#[rustc_legacy_const_generics(`4`)]
8301	pub fn _mm512_mask_mul_round_ps<const ROUNDING: i32>(
8302	src: __m512,
8303	k: __mmask16,
8304	a: __m512,
8305	b: __m512,
8306	) -> __m512 {
8307	unsafe {
8308	static_assert_rounding!(ROUNDING);
8309	let a: Simd = a.as_f32x16();
8310	let b: Simd = b.as_f32x16();
8311	let r: Simd = vmulps(a, b, ROUNDING);
8312	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
8313	}
8314	}
8315
8316	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8317	///
8318	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8319	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8320	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8321	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8322	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8323	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8324	///
8325	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_ps&expand=3939)
8326	#[inline]
8327	#[target_feature(enable = "avx512f")]
8328	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8329	#[cfg_attr(test, assert_instr(vmulps, ROUNDING = `8`))]
8330	#[rustc_legacy_const_generics(`3`)]
8331	pub fn _mm512_maskz_mul_round_ps<const ROUNDING: i32>(
8332	k: __mmask16,
8333	a: __m512,
8334	b: __m512,
8335	) -> __m512 {
8336	unsafe {
8337	static_assert_rounding!(ROUNDING);
8338	let a: Simd = a.as_f32x16();
8339	let b: Simd = b.as_f32x16();
8340	let r: Simd = vmulps(a, b, ROUNDING);
8341	transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
8342	}
8343	}
8344
8345	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
8346	///
8347	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8348	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8349	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8350	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8351	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8352	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8353	///
8354	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_pd&expand=3937)
8355	#[inline]
8356	#[target_feature(enable = "avx512f")]
8357	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8358	#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = `8`))]
8359	#[rustc_legacy_const_generics(`2`)]
8360	pub fn _mm512_mul_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8361	unsafe {
8362	static_assert_rounding!(ROUNDING);
8363	let a: Simd = a.as_f64x8();
8364	let b: Simd = b.as_f64x8();
8365	let r: Simd = vmulpd(a, b, ROUNDING);
8366	transmute(src:r)
8367	}
8368	}
8369
8370	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8371	///
8372	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8373	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8374	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8375	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8376	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8377	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8378	///
8379	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_pd&expand=3935)
8380	#[inline]
8381	#[target_feature(enable = "avx512f")]
8382	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8383	#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = `8`))]
8384	#[rustc_legacy_const_generics(`4`)]
8385	pub fn _mm512_mask_mul_round_pd<const ROUNDING: i32>(
8386	src: __m512d,
8387	k: __mmask8,
8388	a: __m512d,
8389	b: __m512d,
8390	) -> __m512d {
8391	unsafe {
8392	static_assert_rounding!(ROUNDING);
8393	let a: Simd = a.as_f64x8();
8394	let b: Simd = b.as_f64x8();
8395	let r: Simd = vmulpd(a, b, ROUNDING);
8396	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
8397	}
8398	}
8399
8400	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8401	///
8402	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8403	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8404	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8405	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8406	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8407	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8408	///
8409	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_pd&expand=3939)
8410	#[inline]
8411	#[target_feature(enable = "avx512f")]
8412	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8413	#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = `8`))]
8414	#[rustc_legacy_const_generics(`3`)]
8415	pub fn _mm512_maskz_mul_round_pd<const ROUNDING: i32>(
8416	k: __mmask8,
8417	a: __m512d,
8418	b: __m512d,
8419	) -> __m512d {
8420	unsafe {
8421	static_assert_rounding!(ROUNDING);
8422	let a: Simd = a.as_f64x8();
8423	let b: Simd = b.as_f64x8();
8424	let r: Simd = vmulpd(a, b, ROUNDING);
8425	transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
8426	}
8427	}
8428
8429	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.\
8430	///
8431	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8432	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8433	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8434	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8435	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8436	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8437	///
8438	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_ps&expand=2168)
8439	#[inline]
8440	#[target_feature(enable = "avx512f")]
8441	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8442	#[cfg_attr(test, assert_instr(vdivps, ROUNDING = `8`))]
8443	#[rustc_legacy_const_generics(`2`)]
8444	pub fn _mm512_div_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
8445	unsafe {
8446	static_assert_rounding!(ROUNDING);
8447	let a: Simd = a.as_f32x16();
8448	let b: Simd = b.as_f32x16();
8449	let r: Simd = vdivps(a, b, ROUNDING);
8450	transmute(src:r)
8451	}
8452	}
8453
8454	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8455	///
8456	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8457	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8458	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8459	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8460	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8461	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8462	///
8463	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_ps&expand=2169)
8464	#[inline]
8465	#[target_feature(enable = "avx512f")]
8466	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8467	#[cfg_attr(test, assert_instr(vdivps, ROUNDING = `8`))]
8468	#[rustc_legacy_const_generics(`4`)]
8469	pub fn _mm512_mask_div_round_ps<const ROUNDING: i32>(
8470	src: __m512,
8471	k: __mmask16,
8472	a: __m512,
8473	b: __m512,
8474	) -> __m512 {
8475	unsafe {
8476	static_assert_rounding!(ROUNDING);
8477	let a: Simd = a.as_f32x16();
8478	let b: Simd = b.as_f32x16();
8479	let r: Simd = vdivps(a, b, ROUNDING);
8480	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
8481	}
8482	}
8483
8484	/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8485	///
8486	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8487	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8488	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8489	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8490	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8491	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8492	///
8493	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_ps&expand=2170)
8494	#[inline]
8495	#[target_feature(enable = "avx512f")]
8496	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8497	#[cfg_attr(test, assert_instr(vdivps, ROUNDING = `8`))]
8498	#[rustc_legacy_const_generics(`3`)]
8499	pub fn _mm512_maskz_div_round_ps<const ROUNDING: i32>(
8500	k: __mmask16,
8501	a: __m512,
8502	b: __m512,
8503	) -> __m512 {
8504	unsafe {
8505	static_assert_rounding!(ROUNDING);
8506	let a: Simd = a.as_f32x16();
8507	let b: Simd = b.as_f32x16();
8508	let r: Simd = vdivps(a, b, ROUNDING);
8509	transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
8510	}
8511	}
8512
8513	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, =and store the results in dst.\
8514	///
8515	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8516	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8517	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8518	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8519	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8520	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8521	///
8522	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_pd&expand=2165)
8523	#[inline]
8524	#[target_feature(enable = "avx512f")]
8525	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8526	#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = `8`))]
8527	#[rustc_legacy_const_generics(`2`)]
8528	pub fn _mm512_div_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8529	unsafe {
8530	static_assert_rounding!(ROUNDING);
8531	let a: Simd = a.as_f64x8();
8532	let b: Simd = b.as_f64x8();
8533	let r: Simd = vdivpd(a, b, ROUNDING);
8534	transmute(src:r)
8535	}
8536	}
8537
8538	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8539	///
8540	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8541	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8542	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8543	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8544	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8545	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8546	///
8547	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_pd&expand=2166)
8548	#[inline]
8549	#[target_feature(enable = "avx512f")]
8550	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8551	#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = `8`))]
8552	#[rustc_legacy_const_generics(`4`)]
8553	pub fn _mm512_mask_div_round_pd<const ROUNDING: i32>(
8554	src: __m512d,
8555	k: __mmask8,
8556	a: __m512d,
8557	b: __m512d,
8558	) -> __m512d {
8559	unsafe {
8560	static_assert_rounding!(ROUNDING);
8561	let a: Simd = a.as_f64x8();
8562	let b: Simd = b.as_f64x8();
8563	let r: Simd = vdivpd(a, b, ROUNDING);
8564	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
8565	}
8566	}
8567
8568	/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8569	///
8570	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8571	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8572	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8573	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8574	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8575	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8576	///
8577	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_pd&expand=2167)
8578	#[inline]
8579	#[target_feature(enable = "avx512f")]
8580	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8581	#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = `8`))]
8582	#[rustc_legacy_const_generics(`3`)]
8583	pub fn _mm512_maskz_div_round_pd<const ROUNDING: i32>(
8584	k: __mmask8,
8585	a: __m512d,
8586	b: __m512d,
8587	) -> __m512d {
8588	unsafe {
8589	static_assert_rounding!(ROUNDING);
8590	let a: Simd = a.as_f64x8();
8591	let b: Simd = b.as_f64x8();
8592	let r: Simd = vdivpd(a, b, ROUNDING);
8593	transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
8594	}
8595	}
8596
8597	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
8598	///
8599	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8600	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8601	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8602	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8603	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8604	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8605	///
8606	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_ps&expand=5377)
8607	#[inline]
8608	#[target_feature(enable = "avx512f")]
8609	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8610	#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = `8`))]
8611	#[rustc_legacy_const_generics(`1`)]
8612	pub fn _mm512_sqrt_round_ps<const ROUNDING: i32>(a: __m512) -> __m512 {
8613	unsafe {
8614	static_assert_rounding!(ROUNDING);
8615	let a: Simd = a.as_f32x16();
8616	let r: Simd = vsqrtps(a, ROUNDING);
8617	transmute(src:r)
8618	}
8619	}
8620
8621	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8622	///
8623	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8624	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8625	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8626	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8627	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8628	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8629	///
8630	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_ps&expand=5375)
8631	#[inline]
8632	#[target_feature(enable = "avx512f")]
8633	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8634	#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = `8`))]
8635	#[rustc_legacy_const_generics(`3`)]
8636	pub fn _mm512_mask_sqrt_round_ps<const ROUNDING: i32>(
8637	src: __m512,
8638	k: __mmask16,
8639	a: __m512,
8640	) -> __m512 {
8641	unsafe {
8642	static_assert_rounding!(ROUNDING);
8643	let a: Simd = a.as_f32x16();
8644	let r: Simd = vsqrtps(a, ROUNDING);
8645	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
8646	}
8647	}
8648
8649	/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8650	///
8651	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8652	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8653	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8654	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8655	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8656	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8657	///
8658	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_ps&expand=5376)
8659	#[inline]
8660	#[target_feature(enable = "avx512f")]
8661	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8662	#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = `8`))]
8663	#[rustc_legacy_const_generics(`2`)]
8664	pub fn _mm512_maskz_sqrt_round_ps<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512 {
8665	unsafe {
8666	static_assert_rounding!(ROUNDING);
8667	let a: Simd = a.as_f32x16();
8668	let r: Simd = vsqrtps(a, ROUNDING);
8669	transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
8670	}
8671	}
8672
8673	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
8674	///
8675	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8676	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8677	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8678	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8679	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8680	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8681	///
8682	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_pd&expand=5374)
8683	#[inline]
8684	#[target_feature(enable = "avx512f")]
8685	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8686	#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = `8`))]
8687	#[rustc_legacy_const_generics(`1`)]
8688	pub fn _mm512_sqrt_round_pd<const ROUNDING: i32>(a: __m512d) -> __m512d {
8689	unsafe {
8690	static_assert_rounding!(ROUNDING);
8691	let a: Simd = a.as_f64x8();
8692	let r: Simd = vsqrtpd(a, ROUNDING);
8693	transmute(src:r)
8694	}
8695	}
8696
8697	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8698	///
8699	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8700	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8701	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8702	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8703	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8704	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8705	///
8706	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_pd&expand=5372)
8707	#[inline]
8708	#[target_feature(enable = "avx512f")]
8709	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8710	#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = `8`))]
8711	#[rustc_legacy_const_generics(`3`)]
8712	pub fn _mm512_mask_sqrt_round_pd<const ROUNDING: i32>(
8713	src: __m512d,
8714	k: __mmask8,
8715	a: __m512d,
8716	) -> __m512d {
8717	unsafe {
8718	static_assert_rounding!(ROUNDING);
8719	let a: Simd = a.as_f64x8();
8720	let r: Simd = vsqrtpd(a, ROUNDING);
8721	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
8722	}
8723	}
8724
8725	/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8726	///
8727	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8728	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8729	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8730	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8731	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8732	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8733	///
8734	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_pd&expand=5373)
8735	#[inline]
8736	#[target_feature(enable = "avx512f")]
8737	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8738	#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = `8`))]
8739	#[rustc_legacy_const_generics(`2`)]
8740	pub fn _mm512_maskz_sqrt_round_pd<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512d {
8741	unsafe {
8742	static_assert_rounding!(ROUNDING);
8743	let a: Simd = a.as_f64x8();
8744	let r: Simd = vsqrtpd(a, ROUNDING);
8745	transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
8746	}
8747	}
8748
8749	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8750	///
8751	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8752	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8753	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8754	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8755	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8756	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8757	///
8758	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_ps&expand=2565)
8759	#[inline]
8760	#[target_feature(enable = "avx512f")]
8761	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8762	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8763	#[rustc_legacy_const_generics(`3`)]
8764	pub fn _mm512_fmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8765	unsafe {
8766	static_assert_rounding!(ROUNDING);
8767	vfmadd132psround(a, b, c, ROUNDING)
8768	}
8769	}
8770
8771	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8772	///
8773	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8774	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8775	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8776	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8777	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8778	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8779	///
8780	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_ps&expand=2566)
8781	#[inline]
8782	#[target_feature(enable = "avx512f")]
8783	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8784	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8785	#[rustc_legacy_const_generics(`4`)]
8786	pub fn _mm512_mask_fmadd_round_ps<const ROUNDING: i32>(
8787	a: __m512,
8788	k: __mmask16,
8789	b: __m512,
8790	c: __m512,
8791	) -> __m512 {
8792	unsafe {
8793	static_assert_rounding!(ROUNDING);
8794	simd_select_bitmask(m:k, yes:vfmadd132psround(a, b, c, ROUNDING), no:a)
8795	}
8796	}
8797
8798	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in a using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8799	///
8800	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8801	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8802	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8803	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8804	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8805	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8806	///
8807	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_ps&expand=2568)
8808	#[inline]
8809	#[target_feature(enable = "avx512f")]
8810	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8811	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8812	#[rustc_legacy_const_generics(`4`)]
8813	pub fn _mm512_maskz_fmadd_round_ps<const ROUNDING: i32>(
8814	k: __mmask16,
8815	a: __m512,
8816	b: __m512,
8817	c: __m512,
8818	) -> __m512 {
8819	unsafe {
8820	static_assert_rounding!(ROUNDING);
8821	simd_select_bitmask(m:k, yes:vfmadd132psround(a, b, c, ROUNDING), no:_mm512_setzero_ps())
8822	}
8823	}
8824
8825	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8826	///
8827	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8828	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8829	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8830	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8831	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8832	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8833	///
8834	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_ps&expand=2567)
8835	#[inline]
8836	#[target_feature(enable = "avx512f")]
8837	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8838	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8839	#[rustc_legacy_const_generics(`4`)]
8840	pub fn _mm512_mask3_fmadd_round_ps<const ROUNDING: i32>(
8841	a: __m512,
8842	b: __m512,
8843	c: __m512,
8844	k: __mmask16,
8845	) -> __m512 {
8846	unsafe {
8847	static_assert_rounding!(ROUNDING);
8848	simd_select_bitmask(m:k, yes:vfmadd132psround(a, b, c, ROUNDING), no:c)
8849	}
8850	}
8851
8852	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8853	///
8854	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8855	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8856	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8857	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8858	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8859	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8860	///
8861	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_pd&expand=2561)
8862	#[inline]
8863	#[target_feature(enable = "avx512f")]
8864	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8865	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8866	#[rustc_legacy_const_generics(`3`)]
8867	pub fn _mm512_fmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
8868	unsafe {
8869	static_assert_rounding!(ROUNDING);
8870	vfmadd132pdround(a, b, c, ROUNDING)
8871	}
8872	}
8873
8874	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8875	///
8876	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8877	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8878	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8879	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8880	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8881	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8882	///
8883	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_pd&expand=2562)
8884	#[inline]
8885	#[target_feature(enable = "avx512f")]
8886	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8887	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8888	#[rustc_legacy_const_generics(`4`)]
8889	pub fn _mm512_mask_fmadd_round_pd<const ROUNDING: i32>(
8890	a: __m512d,
8891	k: __mmask8,
8892	b: __m512d,
8893	c: __m512d,
8894	) -> __m512d {
8895	unsafe {
8896	static_assert_rounding!(ROUNDING);
8897	simd_select_bitmask(m:k, yes:vfmadd132pdround(a, b, c, ROUNDING), no:a)
8898	}
8899	}
8900
8901	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8902	///
8903	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8904	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8905	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8906	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8907	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8908	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8909	///
8910	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_pd&expand=2564)
8911	#[inline]
8912	#[target_feature(enable = "avx512f")]
8913	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8914	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8915	#[rustc_legacy_const_generics(`4`)]
8916	pub fn _mm512_maskz_fmadd_round_pd<const ROUNDING: i32>(
8917	k: __mmask8,
8918	a: __m512d,
8919	b: __m512d,
8920	c: __m512d,
8921	) -> __m512d {
8922	unsafe {
8923	static_assert_rounding!(ROUNDING);
8924	simd_select_bitmask(m:k, yes:vfmadd132pdround(a, b, c, ROUNDING), no:_mm512_setzero_pd())
8925	}
8926	}
8927
8928	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8929	///
8930	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8931	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8932	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8933	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8934	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8935	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8936	///
8937	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_pd&expand=2563)
8938	#[inline]
8939	#[target_feature(enable = "avx512f")]
8940	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8941	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8942	#[rustc_legacy_const_generics(`4`)]
8943	pub fn _mm512_mask3_fmadd_round_pd<const ROUNDING: i32>(
8944	a: __m512d,
8945	b: __m512d,
8946	c: __m512d,
8947	k: __mmask8,
8948	) -> __m512d {
8949	unsafe {
8950	static_assert_rounding!(ROUNDING);
8951	simd_select_bitmask(m:k, yes:vfmadd132pdround(a, b, c, ROUNDING), no:c)
8952	}
8953	}
8954
8955	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8956	///
8957	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8958	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8959	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8960	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8961	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8962	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8963	///
8964	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_ps&expand=2651)
8965	#[inline]
8966	#[target_feature(enable = "avx512f")]
8967	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8968	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8969	#[rustc_legacy_const_generics(`3`)]
8970	pub fn _mm512_fmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8971	unsafe {
8972	static_assert_rounding!(ROUNDING);
8973	vfmadd132psround(a, b, c:simd_neg(c), ROUNDING)
8974	}
8975	}
8976
8977	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8978	///
8979	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8980	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8981	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8982	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8983	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8984	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8985	///
8986	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_ps&expand=2652)
8987	#[inline]
8988	#[target_feature(enable = "avx512f")]
8989	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8990	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8991	#[rustc_legacy_const_generics(`4`)]
8992	pub fn _mm512_mask_fmsub_round_ps<const ROUNDING: i32>(
8993	a: __m512,
8994	k: __mmask16,
8995	b: __m512,
8996	c: __m512,
8997	) -> __m512 {
8998	unsafe {
8999	static_assert_rounding!(ROUNDING);
9000	let r: __m512 = vfmadd132psround(a, b, c:simd_neg(c), ROUNDING);
9001	simd_select_bitmask(m:k, yes:r, no:a)
9002	}
9003	}
9004
9005	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9006	///
9007	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9008	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9009	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9010	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9011	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9012	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9013	///
9014	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_ps&expand=2654)
9015	#[inline]
9016	#[target_feature(enable = "avx512f")]
9017	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9018	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
9019	#[rustc_legacy_const_generics(`4`)]
9020	pub fn _mm512_maskz_fmsub_round_ps<const ROUNDING: i32>(
9021	k: __mmask16,
9022	a: __m512,
9023	b: __m512,
9024	c: __m512,
9025	) -> __m512 {
9026	unsafe {
9027	static_assert_rounding!(ROUNDING);
9028	let r: __m512 = vfmadd132psround(a, b, c:simd_neg(c), ROUNDING);
9029	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ps())
9030	}
9031	}
9032
9033	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9034	///
9035	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9036	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9037	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9038	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9039	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9040	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9041	///
9042	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_ps&expand=2653)
9043	#[inline]
9044	#[target_feature(enable = "avx512f")]
9045	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9046	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
9047	#[rustc_legacy_const_generics(`4`)]
9048	pub fn _mm512_mask3_fmsub_round_ps<const ROUNDING: i32>(
9049	a: __m512,
9050	b: __m512,
9051	c: __m512,
9052	k: __mmask16,
9053	) -> __m512 {
9054	unsafe {
9055	static_assert_rounding!(ROUNDING);
9056	let r: __m512 = vfmadd132psround(a, b, c:simd_neg(c), ROUNDING);
9057	simd_select_bitmask(m:k, yes:r, no:c)
9058	}
9059	}
9060
9061	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
9062	///
9063	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9064	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9065	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9066	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9067	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9068	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9069	///
9070	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_pd&expand=2647)
9071	#[inline]
9072	#[target_feature(enable = "avx512f")]
9073	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9074	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
9075	#[rustc_legacy_const_generics(`3`)]
9076	pub fn _mm512_fmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9077	unsafe {
9078	static_assert_rounding!(ROUNDING);
9079	vfmadd132pdround(a, b, c:simd_neg(c), ROUNDING)
9080	}
9081	}
9082
9083	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9084	///
9085	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9086	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9087	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9088	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9089	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9090	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9091	///
9092	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_pd&expand=2648)
9093	#[inline]
9094	#[target_feature(enable = "avx512f")]
9095	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9096	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
9097	#[rustc_legacy_const_generics(`4`)]
9098	pub fn _mm512_mask_fmsub_round_pd<const ROUNDING: i32>(
9099	a: __m512d,
9100	k: __mmask8,
9101	b: __m512d,
9102	c: __m512d,
9103	) -> __m512d {
9104	unsafe {
9105	static_assert_rounding!(ROUNDING);
9106	let r: __m512d = vfmadd132pdround(a, b, c:simd_neg(c), ROUNDING);
9107	simd_select_bitmask(m:k, yes:r, no:a)
9108	}
9109	}
9110
9111	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9112	///
9113	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9114	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9115	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9116	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9117	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9118	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9119	///
9120	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_pd&expand=2650)
9121	#[inline]
9122	#[target_feature(enable = "avx512f")]
9123	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9124	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
9125	#[rustc_legacy_const_generics(`4`)]
9126	pub fn _mm512_maskz_fmsub_round_pd<const ROUNDING: i32>(
9127	k: __mmask8,
9128	a: __m512d,
9129	b: __m512d,
9130	c: __m512d,
9131	) -> __m512d {
9132	unsafe {
9133	static_assert_rounding!(ROUNDING);
9134	let r: __m512d = vfmadd132pdround(a, b, c:simd_neg(c), ROUNDING);
9135	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_pd())
9136	}
9137	}
9138
9139	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9140	///
9141	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9142	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9143	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9144	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9145	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9146	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9147	///
9148	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_pd&expand=2649)
9149	#[inline]
9150	#[target_feature(enable = "avx512f")]
9151	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9152	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
9153	#[rustc_legacy_const_generics(`4`)]
9154	pub fn _mm512_mask3_fmsub_round_pd<const ROUNDING: i32>(
9155	a: __m512d,
9156	b: __m512d,
9157	c: __m512d,
9158	k: __mmask8,
9159	) -> __m512d {
9160	unsafe {
9161	static_assert_rounding!(ROUNDING);
9162	let r: __m512d = vfmadd132pdround(a, b, c:simd_neg(c), ROUNDING);
9163	simd_select_bitmask(m:k, yes:r, no:c)
9164	}
9165	}
9166
9167	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
9168	///
9169	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9170	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9171	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9172	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9173	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9174	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9175	///
9176	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_ps&expand=2619)
9177	#[inline]
9178	#[target_feature(enable = "avx512f")]
9179	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9180	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
9181	#[rustc_legacy_const_generics(`3`)]
9182	pub fn _mm512_fmaddsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9183	unsafe {
9184	static_assert_rounding!(ROUNDING);
9185	vfmaddsubpsround(a, b, c, ROUNDING)
9186	}
9187	}
9188
9189	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9190	///
9191	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9192	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9193	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9194	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9195	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9196	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9197	///
9198	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_ps&expand=2620)
9199	#[inline]
9200	#[target_feature(enable = "avx512f")]
9201	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9202	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
9203	#[rustc_legacy_const_generics(`4`)]
9204	pub fn _mm512_mask_fmaddsub_round_ps<const ROUNDING: i32>(
9205	a: __m512,
9206	k: __mmask16,
9207	b: __m512,
9208	c: __m512,
9209	) -> __m512 {
9210	unsafe {
9211	static_assert_rounding!(ROUNDING);
9212	simd_select_bitmask(m:k, yes:vfmaddsubpsround(a, b, c, ROUNDING), no:a)
9213	}
9214	}
9215
9216	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9217	///
9218	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9219	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9220	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9221	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9222	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9223	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9224	///
9225	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_ps&expand=2622)
9226	#[inline]
9227	#[target_feature(enable = "avx512f")]
9228	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9229	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
9230	#[rustc_legacy_const_generics(`4`)]
9231	pub fn _mm512_maskz_fmaddsub_round_ps<const ROUNDING: i32>(
9232	k: __mmask16,
9233	a: __m512,
9234	b: __m512,
9235	c: __m512,
9236	) -> __m512 {
9237	unsafe {
9238	static_assert_rounding!(ROUNDING);
9239	simd_select_bitmask(m:k, yes:vfmaddsubpsround(a, b, c, ROUNDING), no:_mm512_setzero_ps())
9240	}
9241	}
9242
9243	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9244	///
9245	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9246	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9247	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9248	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9249	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9250	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9251	///
9252	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_ps&expand=2621)
9253	#[inline]
9254	#[target_feature(enable = "avx512f")]
9255	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9256	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
9257	#[rustc_legacy_const_generics(`4`)]
9258	pub fn _mm512_mask3_fmaddsub_round_ps<const ROUNDING: i32>(
9259	a: __m512,
9260	b: __m512,
9261	c: __m512,
9262	k: __mmask16,
9263	) -> __m512 {
9264	unsafe {
9265	static_assert_rounding!(ROUNDING);
9266	simd_select_bitmask(m:k, yes:vfmaddsubpsround(a, b, c, ROUNDING), no:c)
9267	}
9268	}
9269
9270	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
9271	///
9272	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9273	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9274	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9275	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9276	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9277	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9278	///
9279	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_pd&expand=2615)
9280	#[inline]
9281	#[target_feature(enable = "avx512f")]
9282	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9283	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9284	#[rustc_legacy_const_generics(`3`)]
9285	pub fn _mm512_fmaddsub_round_pd<const ROUNDING: i32>(
9286	a: __m512d,
9287	b: __m512d,
9288	c: __m512d,
9289	) -> __m512d {
9290	unsafe {
9291	static_assert_rounding!(ROUNDING);
9292	vfmaddsubpdround(a, b, c, ROUNDING)
9293	}
9294	}
9295
9296	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9297	///
9298	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9299	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9300	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9301	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9302	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9303	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9304	///
9305	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_pd&expand=2616)
9306	#[inline]
9307	#[target_feature(enable = "avx512f")]
9308	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9309	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9310	#[rustc_legacy_const_generics(`4`)]
9311	pub fn _mm512_mask_fmaddsub_round_pd<const ROUNDING: i32>(
9312	a: __m512d,
9313	k: __mmask8,
9314	b: __m512d,
9315	c: __m512d,
9316	) -> __m512d {
9317	unsafe {
9318	static_assert_rounding!(ROUNDING);
9319	simd_select_bitmask(m:k, yes:vfmaddsubpdround(a, b, c, ROUNDING), no:a)
9320	}
9321	}
9322
9323	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9324	///
9325	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9326	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9327	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9328	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9329	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9330	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9331	///
9332	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_pd&expand=2618)
9333	#[inline]
9334	#[target_feature(enable = "avx512f")]
9335	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9336	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9337	#[rustc_legacy_const_generics(`4`)]
9338	pub fn _mm512_maskz_fmaddsub_round_pd<const ROUNDING: i32>(
9339	k: __mmask8,
9340	a: __m512d,
9341	b: __m512d,
9342	c: __m512d,
9343	) -> __m512d {
9344	unsafe {
9345	static_assert_rounding!(ROUNDING);
9346	simd_select_bitmask(m:k, yes:vfmaddsubpdround(a, b, c, ROUNDING), no:_mm512_setzero_pd())
9347	}
9348	}
9349
9350	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9351	///
9352	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9353	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9354	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9355	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9356	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9357	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9358	///
9359	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_pd&expand=2617)
9360	#[inline]
9361	#[target_feature(enable = "avx512f")]
9362	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9363	#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = `8`))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9364	#[rustc_legacy_const_generics(`4`)]
9365	pub fn _mm512_mask3_fmaddsub_round_pd<const ROUNDING: i32>(
9366	a: __m512d,
9367	b: __m512d,
9368	c: __m512d,
9369	k: __mmask8,
9370	) -> __m512d {
9371	unsafe {
9372	static_assert_rounding!(ROUNDING);
9373	simd_select_bitmask(m:k, yes:vfmaddsubpdround(a, b, c, ROUNDING), no:c)
9374	}
9375	}
9376
9377	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9378	///
9379	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9380	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9381	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9382	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9383	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9384	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9385	///
9386	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_ps&expand=2699)
9387	#[inline]
9388	#[target_feature(enable = "avx512f")]
9389	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9390	#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = `8`))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9391	#[rustc_legacy_const_generics(`3`)]
9392	pub fn _mm512_fmsubadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9393	unsafe {
9394	static_assert_rounding!(ROUNDING);
9395	vfmaddsubpsround(a, b, c:simd_neg(c), ROUNDING)
9396	}
9397	}
9398
9399	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9400	///
9401	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9402	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9403	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9404	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9405	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9406	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9407	///
9408	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_ps&expand=2700)
9409	#[inline]
9410	#[target_feature(enable = "avx512f")]
9411	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9412	#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = `8`))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9413	#[rustc_legacy_const_generics(`4`)]
9414	pub fn _mm512_mask_fmsubadd_round_ps<const ROUNDING: i32>(
9415	a: __m512,
9416	k: __mmask16,
9417	b: __m512,
9418	c: __m512,
9419	) -> __m512 {
9420	unsafe {
9421	static_assert_rounding!(ROUNDING);
9422	let r: __m512 = vfmaddsubpsround(a, b, c:simd_neg(c), ROUNDING);
9423	simd_select_bitmask(m:k, yes:r, no:a)
9424	}
9425	}
9426
9427	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9428	///
9429	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9430	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9431	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9432	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9433	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9434	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9435	///
9436	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_ps&expand=2702)
9437	#[inline]
9438	#[target_feature(enable = "avx512f")]
9439	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9440	#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = `8`))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9441	#[rustc_legacy_const_generics(`4`)]
9442	pub fn _mm512_maskz_fmsubadd_round_ps<const ROUNDING: i32>(
9443	k: __mmask16,
9444	a: __m512,
9445	b: __m512,
9446	c: __m512,
9447	) -> __m512 {
9448	unsafe {
9449	static_assert_rounding!(ROUNDING);
9450	let r: __m512 = vfmaddsubpsround(a, b, c:simd_neg(c), ROUNDING);
9451	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ps())
9452	}
9453	}
9454
9455	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9456	///
9457	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9458	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9459	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9460	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9461	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9462	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9463	///
9464	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_ps&expand=2701)
9465	#[inline]
9466	#[target_feature(enable = "avx512f")]
9467	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9468	#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = `8`))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9469	#[rustc_legacy_const_generics(`4`)]
9470	pub fn _mm512_mask3_fmsubadd_round_ps<const ROUNDING: i32>(
9471	a: __m512,
9472	b: __m512,
9473	c: __m512,
9474	k: __mmask16,
9475	) -> __m512 {
9476	unsafe {
9477	static_assert_rounding!(ROUNDING);
9478	let r: __m512 = vfmaddsubpsround(a, b, c:simd_neg(c), ROUNDING);
9479	simd_select_bitmask(m:k, yes:r, no:c)
9480	}
9481	}
9482
9483	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9484	///
9485	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9486	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9487	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9488	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9489	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9490	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9491	///
9492	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_pd&expand=2695)
9493	#[inline]
9494	#[target_feature(enable = "avx512f")]
9495	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9496	#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = `8`))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9497	#[rustc_legacy_const_generics(`3`)]
9498	pub fn _mm512_fmsubadd_round_pd<const ROUNDING: i32>(
9499	a: __m512d,
9500	b: __m512d,
9501	c: __m512d,
9502	) -> __m512d {
9503	unsafe {
9504	static_assert_rounding!(ROUNDING);
9505	vfmaddsubpdround(a, b, c:simd_neg(c), ROUNDING)
9506	}
9507	}
9508
9509	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9510	///
9511	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9512	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9513	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9514	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9515	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9516	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9517	///
9518	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_pd&expand=2696)
9519	#[inline]
9520	#[target_feature(enable = "avx512f")]
9521	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9522	#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = `8`))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9523	#[rustc_legacy_const_generics(`4`)]
9524	pub fn _mm512_mask_fmsubadd_round_pd<const ROUNDING: i32>(
9525	a: __m512d,
9526	k: __mmask8,
9527	b: __m512d,
9528	c: __m512d,
9529	) -> __m512d {
9530	unsafe {
9531	static_assert_rounding!(ROUNDING);
9532	let r: __m512d = vfmaddsubpdround(a, b, c:simd_neg(c), ROUNDING);
9533	simd_select_bitmask(m:k, yes:r, no:a)
9534	}
9535	}
9536
9537	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9538	///
9539	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9540	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9541	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9542	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9543	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9544	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9545	///
9546	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_pd&expand=2698)
9547	#[inline]
9548	#[target_feature(enable = "avx512f")]
9549	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9550	#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = `8`))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9551	#[rustc_legacy_const_generics(`4`)]
9552	pub fn _mm512_maskz_fmsubadd_round_pd<const ROUNDING: i32>(
9553	k: __mmask8,
9554	a: __m512d,
9555	b: __m512d,
9556	c: __m512d,
9557	) -> __m512d {
9558	unsafe {
9559	static_assert_rounding!(ROUNDING);
9560	let r: __m512d = vfmaddsubpdround(a, b, c:simd_neg(c), ROUNDING);
9561	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_pd())
9562	}
9563	}
9564
9565	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9566	///
9567	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9568	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9569	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9570	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9571	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9572	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9573	///
9574	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_pd&expand=2697)
9575	#[inline]
9576	#[target_feature(enable = "avx512f")]
9577	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9578	#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = `8`))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9579	#[rustc_legacy_const_generics(`4`)]
9580	pub fn _mm512_mask3_fmsubadd_round_pd<const ROUNDING: i32>(
9581	a: __m512d,
9582	b: __m512d,
9583	c: __m512d,
9584	k: __mmask8,
9585	) -> __m512d {
9586	unsafe {
9587	static_assert_rounding!(ROUNDING);
9588	let r: __m512d = vfmaddsubpdround(a, b, c:simd_neg(c), ROUNDING);
9589	simd_select_bitmask(m:k, yes:r, no:c)
9590	}
9591	}
9592
9593	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9594	///
9595	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9596	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9597	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9598	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9599	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9600	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9601	///
9602	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_ps&expand=2731)
9603	#[inline]
9604	#[target_feature(enable = "avx512f")]
9605	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9606	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9607	#[rustc_legacy_const_generics(`3`)]
9608	pub fn _mm512_fnmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9609	unsafe {
9610	static_assert_rounding!(ROUNDING);
9611	vfmadd132psround(a:simd_neg(a), b, c, ROUNDING)
9612	}
9613	}
9614
9615	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9616	///
9617	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9618	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9619	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9620	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9621	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9622	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9623	///
9624	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_ps&expand=2732)
9625	#[inline]
9626	#[target_feature(enable = "avx512f")]
9627	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9628	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9629	#[rustc_legacy_const_generics(`4`)]
9630	pub fn _mm512_mask_fnmadd_round_ps<const ROUNDING: i32>(
9631	a: __m512,
9632	k: __mmask16,
9633	b: __m512,
9634	c: __m512,
9635	) -> __m512 {
9636	unsafe {
9637	static_assert_rounding!(ROUNDING);
9638	let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c, ROUNDING);
9639	simd_select_bitmask(m:k, yes:r, no:a)
9640	}
9641	}
9642
9643	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9644	///
9645	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9646	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9647	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9648	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9649	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9650	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9651	///
9652	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_ps&expand=2734)
9653	#[inline]
9654	#[target_feature(enable = "avx512f")]
9655	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9656	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9657	#[rustc_legacy_const_generics(`4`)]
9658	pub fn _mm512_maskz_fnmadd_round_ps<const ROUNDING: i32>(
9659	k: __mmask16,
9660	a: __m512,
9661	b: __m512,
9662	c: __m512,
9663	) -> __m512 {
9664	unsafe {
9665	static_assert_rounding!(ROUNDING);
9666	let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c, ROUNDING);
9667	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ps())
9668	}
9669	}
9670
9671	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9672	///
9673	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9674	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9675	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9676	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9677	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9678	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9679	///
9680	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_ps&expand=2733)
9681	#[inline]
9682	#[target_feature(enable = "avx512f")]
9683	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9684	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9685	#[rustc_legacy_const_generics(`4`)]
9686	pub fn _mm512_mask3_fnmadd_round_ps<const ROUNDING: i32>(
9687	a: __m512,
9688	b: __m512,
9689	c: __m512,
9690	k: __mmask16,
9691	) -> __m512 {
9692	unsafe {
9693	static_assert_rounding!(ROUNDING);
9694	let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c, ROUNDING);
9695	simd_select_bitmask(m:k, yes:r, no:c)
9696	}
9697	}
9698
9699	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9700	///
9701	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9702	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9703	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9704	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9705	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9706	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9707	///
9708	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_pd&expand=2711)
9709	#[inline]
9710	#[target_feature(enable = "avx512f")]
9711	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9712	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9713	#[rustc_legacy_const_generics(`3`)]
9714	pub fn _mm512_fnmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9715	unsafe {
9716	static_assert_rounding!(ROUNDING);
9717	vfmadd132pdround(a:simd_neg(a), b, c, ROUNDING)
9718	}
9719	}
9720
9721	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9722	///
9723	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9724	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9725	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9726	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9727	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9728	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9729	///
9730	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_pd&expand=2728)
9731	#[inline]
9732	#[target_feature(enable = "avx512f")]
9733	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9734	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9735	#[rustc_legacy_const_generics(`4`)]
9736	pub fn _mm512_mask_fnmadd_round_pd<const ROUNDING: i32>(
9737	a: __m512d,
9738	k: __mmask8,
9739	b: __m512d,
9740	c: __m512d,
9741	) -> __m512d {
9742	unsafe {
9743	static_assert_rounding!(ROUNDING);
9744	let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c, ROUNDING);
9745	simd_select_bitmask(m:k, yes:r, no:a)
9746	}
9747	}
9748
9749	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9750	///
9751	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9752	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9753	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9754	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9755	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9756	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9757	///
9758	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_pd&expand=2730)
9759	#[inline]
9760	#[target_feature(enable = "avx512f")]
9761	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9762	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9763	#[rustc_legacy_const_generics(`4`)]
9764	pub fn _mm512_maskz_fnmadd_round_pd<const ROUNDING: i32>(
9765	k: __mmask8,
9766	a: __m512d,
9767	b: __m512d,
9768	c: __m512d,
9769	) -> __m512d {
9770	unsafe {
9771	static_assert_rounding!(ROUNDING);
9772	let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c, ROUNDING);
9773	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_pd())
9774	}
9775	}
9776
9777	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9778	///
9779	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9780	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9781	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9782	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9783	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9784	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9785	///
9786	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_pd&expand=2729)
9787	#[inline]
9788	#[target_feature(enable = "avx512f")]
9789	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9790	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9791	#[rustc_legacy_const_generics(`4`)]
9792	pub fn _mm512_mask3_fnmadd_round_pd<const ROUNDING: i32>(
9793	a: __m512d,
9794	b: __m512d,
9795	c: __m512d,
9796	k: __mmask8,
9797	) -> __m512d {
9798	unsafe {
9799	static_assert_rounding!(ROUNDING);
9800	let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c, ROUNDING);
9801	simd_select_bitmask(m:k, yes:r, no:c)
9802	}
9803	}
9804
9805	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9806	///
9807	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9808	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9809	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9810	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9811	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9812	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9813	///
9814	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_ps&expand=2779)
9815	#[inline]
9816	#[target_feature(enable = "avx512f")]
9817	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9818	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9819	#[rustc_legacy_const_generics(`3`)]
9820	pub fn _mm512_fnmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9821	unsafe {
9822	static_assert_rounding!(ROUNDING);
9823	vfmadd132psround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING)
9824	}
9825	}
9826
9827	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9828	///
9829	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9830	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9831	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9832	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9833	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9834	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9835	///
9836	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_ps&expand=2780)
9837	#[inline]
9838	#[target_feature(enable = "avx512f")]
9839	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9840	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9841	#[rustc_legacy_const_generics(`4`)]
9842	pub fn _mm512_mask_fnmsub_round_ps<const ROUNDING: i32>(
9843	a: __m512,
9844	k: __mmask16,
9845	b: __m512,
9846	c: __m512,
9847	) -> __m512 {
9848	unsafe {
9849	static_assert_rounding!(ROUNDING);
9850	let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9851	simd_select_bitmask(m:k, yes:r, no:a)
9852	}
9853	}
9854
9855	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9856	///
9857	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9858	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9859	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9860	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9861	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9862	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9863	///
9864	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_ps&expand=2782)
9865	#[inline]
9866	#[target_feature(enable = "avx512f")]
9867	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9868	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9869	#[rustc_legacy_const_generics(`4`)]
9870	pub fn _mm512_maskz_fnmsub_round_ps<const ROUNDING: i32>(
9871	k: __mmask16,
9872	a: __m512,
9873	b: __m512,
9874	c: __m512,
9875	) -> __m512 {
9876	unsafe {
9877	static_assert_rounding!(ROUNDING);
9878	let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9879	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ps())
9880	}
9881	}
9882
9883	/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9884	///
9885	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9886	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9887	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9888	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9889	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9890	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9891	///
9892	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_ps&expand=2781)
9893	#[inline]
9894	#[target_feature(enable = "avx512f")]
9895	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9896	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9897	#[rustc_legacy_const_generics(`4`)]
9898	pub fn _mm512_mask3_fnmsub_round_ps<const ROUNDING: i32>(
9899	a: __m512,
9900	b: __m512,
9901	c: __m512,
9902	k: __mmask16,
9903	) -> __m512 {
9904	unsafe {
9905	static_assert_rounding!(ROUNDING);
9906	let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9907	simd_select_bitmask(m:k, yes:r, no:c)
9908	}
9909	}
9910
9911	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9912	///
9913	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9914	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9915	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9916	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9917	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9918	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9919	///
9920	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_pd&expand=2775)
9921	#[inline]
9922	#[target_feature(enable = "avx512f")]
9923	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9924	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9925	#[rustc_legacy_const_generics(`3`)]
9926	pub fn _mm512_fnmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9927	unsafe {
9928	static_assert_rounding!(ROUNDING);
9929	vfmadd132pdround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING)
9930	}
9931	}
9932
9933	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9934	///
9935	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9936	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9937	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9938	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9939	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9940	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9941	///
9942	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_pd&expand=2776)
9943	#[inline]
9944	#[target_feature(enable = "avx512f")]
9945	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9946	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9947	#[rustc_legacy_const_generics(`4`)]
9948	pub fn _mm512_mask_fnmsub_round_pd<const ROUNDING: i32>(
9949	a: __m512d,
9950	k: __mmask8,
9951	b: __m512d,
9952	c: __m512d,
9953	) -> __m512d {
9954	unsafe {
9955	static_assert_rounding!(ROUNDING);
9956	let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9957	simd_select_bitmask(m:k, yes:r, no:a)
9958	}
9959	}
9960
9961	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9962	///
9963	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9964	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9965	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9966	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9967	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9968	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9969	///
9970	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_pd&expand=2778)
9971	#[inline]
9972	#[target_feature(enable = "avx512f")]
9973	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9974	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9975	#[rustc_legacy_const_generics(`4`)]
9976	pub fn _mm512_maskz_fnmsub_round_pd<const ROUNDING: i32>(
9977	k: __mmask8,
9978	a: __m512d,
9979	b: __m512d,
9980	c: __m512d,
9981	) -> __m512d {
9982	unsafe {
9983	static_assert_rounding!(ROUNDING);
9984	let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9985	simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_pd())
9986	}
9987	}
9988
9989	/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9990	///
9991	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9992	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9993	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9994	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9995	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9996	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9997	///
9998	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_pd&expand=2777)
9999	#[inline]
10000	#[target_feature(enable = "avx512f")]
10001	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10002	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
10003	#[rustc_legacy_const_generics(`4`)]
10004	pub fn _mm512_mask3_fnmsub_round_pd<const ROUNDING: i32>(
10005	a: __m512d,
10006	b: __m512d,
10007	c: __m512d,
10008	k: __mmask8,
10009	) -> __m512d {
10010	unsafe {
10011	static_assert_rounding!(ROUNDING);
10012	let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
10013	simd_select_bitmask(m:k, yes:r, no:c)
10014	}
10015	}
10016
10017	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.\
10018	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10019	///
10020	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_ps&expand=3662)
10021	#[inline]
10022	#[target_feature(enable = "avx512f")]
10023	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10024	#[cfg_attr(test, assert_instr(vmaxps, SAE = `8`))]
10025	#[rustc_legacy_const_generics(`2`)]
10026	pub fn _mm512_max_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
10027	unsafe {
10028	static_assert_sae!(SAE);
10029	let a: Simd = a.as_f32x16();
10030	let b: Simd = b.as_f32x16();
10031	let r: Simd = vmaxps(a, b, SAE);
10032	transmute(src:r)
10033	}
10034	}
10035
10036	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10037	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10038	///
10039	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_ps&expand=3660)
10040	#[inline]
10041	#[target_feature(enable = "avx512f")]
10042	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10043	#[cfg_attr(test, assert_instr(vmaxps, SAE = `8`))]
10044	#[rustc_legacy_const_generics(`4`)]
10045	pub fn _mm512_mask_max_round_ps<const SAE: i32>(
10046	src: __m512,
10047	k: __mmask16,
10048	a: __m512,
10049	b: __m512,
10050	) -> __m512 {
10051	unsafe {
10052	static_assert_sae!(SAE);
10053	let a: Simd = a.as_f32x16();
10054	let b: Simd = b.as_f32x16();
10055	let r: Simd = vmaxps(a, b, SAE);
10056	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
10057	}
10058	}
10059
10060	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10061	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10062	///
10063	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_ps&expand=3661)
10064	#[inline]
10065	#[target_feature(enable = "avx512f")]
10066	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10067	#[cfg_attr(test, assert_instr(vmaxps, SAE = `8`))]
10068	#[rustc_legacy_const_generics(`3`)]
10069	pub fn _mm512_maskz_max_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
10070	unsafe {
10071	static_assert_sae!(SAE);
10072	let a: Simd = a.as_f32x16();
10073	let b: Simd = b.as_f32x16();
10074	let r: Simd = vmaxps(a, b, SAE);
10075	transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
10076	}
10077	}
10078
10079	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.\
10080	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10081	///
10082	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_pd&expand=3659)
10083	#[inline]
10084	#[target_feature(enable = "avx512f")]
10085	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10086	#[cfg_attr(test, assert_instr(vmaxpd, SAE = `8`))]
10087	#[rustc_legacy_const_generics(`2`)]
10088	pub fn _mm512_max_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
10089	unsafe {
10090	static_assert_sae!(SAE);
10091	let a: Simd = a.as_f64x8();
10092	let b: Simd = b.as_f64x8();
10093	let r: Simd = vmaxpd(a, b, SAE);
10094	transmute(src:r)
10095	}
10096	}
10097
10098	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10099	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10100	///
10101	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_pd&expand=3657)
10102	#[inline]
10103	#[target_feature(enable = "avx512f")]
10104	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10105	#[cfg_attr(test, assert_instr(vmaxpd, SAE = `8`))]
10106	#[rustc_legacy_const_generics(`4`)]
10107	pub fn _mm512_mask_max_round_pd<const SAE: i32>(
10108	src: __m512d,
10109	k: __mmask8,
10110	a: __m512d,
10111	b: __m512d,
10112	) -> __m512d {
10113	unsafe {
10114	static_assert_sae!(SAE);
10115	let a: Simd = a.as_f64x8();
10116	let b: Simd = b.as_f64x8();
10117	let r: Simd = vmaxpd(a, b, SAE);
10118	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
10119	}
10120	}
10121
10122	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10123	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10124	///
10125	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_pd&expand=3658)
10126	#[inline]
10127	#[target_feature(enable = "avx512f")]
10128	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10129	#[cfg_attr(test, assert_instr(vmaxpd, SAE = `8`))]
10130	#[rustc_legacy_const_generics(`3`)]
10131	pub fn _mm512_maskz_max_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
10132	unsafe {
10133	static_assert_sae!(SAE);
10134	let a: Simd = a.as_f64x8();
10135	let b: Simd = b.as_f64x8();
10136	let r: Simd = vmaxpd(a, b, SAE);
10137	transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
10138	}
10139	}
10140
10141	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.\
10142	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10143	///
10144	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_ps&expand=3776)
10145	#[inline]
10146	#[target_feature(enable = "avx512f")]
10147	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10148	#[cfg_attr(test, assert_instr(vminps, SAE = `8`))]
10149	#[rustc_legacy_const_generics(`2`)]
10150	pub fn _mm512_min_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
10151	unsafe {
10152	static_assert_sae!(SAE);
10153	let a: Simd = a.as_f32x16();
10154	let b: Simd = b.as_f32x16();
10155	let r: Simd = vminps(a, b, SAE);
10156	transmute(src:r)
10157	}
10158	}
10159
10160	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10161	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10162	///
10163	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_ps&expand=3774)
10164	#[inline]
10165	#[target_feature(enable = "avx512f")]
10166	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10167	#[cfg_attr(test, assert_instr(vminps, SAE = `8`))]
10168	#[rustc_legacy_const_generics(`4`)]
10169	pub fn _mm512_mask_min_round_ps<const SAE: i32>(
10170	src: __m512,
10171	k: __mmask16,
10172	a: __m512,
10173	b: __m512,
10174	) -> __m512 {
10175	unsafe {
10176	static_assert_sae!(SAE);
10177	let a: Simd = a.as_f32x16();
10178	let b: Simd = b.as_f32x16();
10179	let r: Simd = vminps(a, b, SAE);
10180	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
10181	}
10182	}
10183
10184	/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10185	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10186	///
10187	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_ps&expand=3775)
10188	#[inline]
10189	#[target_feature(enable = "avx512f")]
10190	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10191	#[cfg_attr(test, assert_instr(vminps, SAE = `8`))]
10192	#[rustc_legacy_const_generics(`3`)]
10193	pub fn _mm512_maskz_min_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
10194	unsafe {
10195	static_assert_sae!(SAE);
10196	let a: Simd = a.as_f32x16();
10197	let b: Simd = b.as_f32x16();
10198	let r: Simd = vminps(a, b, SAE);
10199	transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
10200	}
10201	}
10202
10203	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.\
10204	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10205	///
10206	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_pd&expand=3773)
10207	#[inline]
10208	#[target_feature(enable = "avx512f")]
10209	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10210	#[cfg_attr(test, assert_instr(vminpd, SAE = `8`))]
10211	#[rustc_legacy_const_generics(`2`)]
10212	pub fn _mm512_min_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
10213	unsafe {
10214	static_assert_sae!(SAE);
10215	let a: Simd = a.as_f64x8();
10216	let b: Simd = b.as_f64x8();
10217	let r: Simd = vminpd(a, b, SAE);
10218	transmute(src:r)
10219	}
10220	}
10221
10222	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10223	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10224	///
10225	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_pd&expand=3771)
10226	#[inline]
10227	#[target_feature(enable = "avx512f")]
10228	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10229	#[cfg_attr(test, assert_instr(vminpd, SAE = `8`))]
10230	#[rustc_legacy_const_generics(`4`)]
10231	pub fn _mm512_mask_min_round_pd<const SAE: i32>(
10232	src: __m512d,
10233	k: __mmask8,
10234	a: __m512d,
10235	b: __m512d,
10236	) -> __m512d {
10237	unsafe {
10238	static_assert_sae!(SAE);
10239	let a: Simd = a.as_f64x8();
10240	let b: Simd = b.as_f64x8();
10241	let r: Simd = vminpd(a, b, SAE);
10242	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
10243	}
10244	}
10245
10246	/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10247	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10248	///
10249	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_pd&expand=3772)
10250	#[inline]
10251	#[target_feature(enable = "avx512f")]
10252	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10253	#[cfg_attr(test, assert_instr(vminpd, SAE = `8`))]
10254	#[rustc_legacy_const_generics(`3`)]
10255	pub fn _mm512_maskz_min_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
10256	unsafe {
10257	static_assert_sae!(SAE);
10258	let a: Simd = a.as_f64x8();
10259	let b: Simd = b.as_f64x8();
10260	let r: Simd = vminpd(a, b, SAE);
10261	transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
10262	}
10263	}
10264
10265	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
10266	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10267	///
10268	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_ps&expand=2850)
10269	#[inline]
10270	#[target_feature(enable = "avx512f")]
10271	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10272	#[cfg_attr(test, assert_instr(vgetexpps, SAE = `8`))]
10273	#[rustc_legacy_const_generics(`1`)]
10274	pub fn _mm512_getexp_round_ps<const SAE: i32>(a: __m512) -> __m512 {
10275	unsafe {
10276	static_assert_sae!(SAE);
10277	let a: Simd = a.as_f32x16();
10278	let r: Simd = vgetexpps(a, src:f32x16::ZERO, m:`0b11111111_11111111`, SAE);
10279	transmute(src:r)
10280	}
10281	}
10282
10283	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10284	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10285	///
10286	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_ps&expand=2851)
10287	#[inline]
10288	#[target_feature(enable = "avx512f")]
10289	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10290	#[cfg_attr(test, assert_instr(vgetexpps, SAE = `8`))]
10291	#[rustc_legacy_const_generics(`3`)]
10292	pub fn _mm512_mask_getexp_round_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
10293	unsafe {
10294	static_assert_sae!(SAE);
10295	let a: Simd = a.as_f32x16();
10296	let src: Simd = src.as_f32x16();
10297	let r: Simd = vgetexpps(a, src, m:k, SAE);
10298	transmute(src:r)
10299	}
10300	}
10301
10302	/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10303	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10304	///
10305	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_ps&expand=2852)
10306	#[inline]
10307	#[target_feature(enable = "avx512f")]
10308	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10309	#[cfg_attr(test, assert_instr(vgetexpps, SAE = `8`))]
10310	#[rustc_legacy_const_generics(`2`)]
10311	pub fn _mm512_maskz_getexp_round_ps<const SAE: i32>(k: __mmask16, a: __m512) -> __m512 {
10312	unsafe {
10313	static_assert_sae!(SAE);
10314	let a: Simd = a.as_f32x16();
10315	let r: Simd = vgetexpps(a, src:f32x16::ZERO, m:k, SAE);
10316	transmute(src:r)
10317	}
10318	}
10319
10320	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
10321	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10322	///
10323	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_pd&expand=2847)
10324	#[inline]
10325	#[target_feature(enable = "avx512f")]
10326	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10327	#[cfg_attr(test, assert_instr(vgetexppd, SAE = `8`))]
10328	#[rustc_legacy_const_generics(`1`)]
10329	pub fn _mm512_getexp_round_pd<const SAE: i32>(a: __m512d) -> __m512d {
10330	unsafe {
10331	static_assert_sae!(SAE);
10332	let a: Simd = a.as_f64x8();
10333	let r: Simd = vgetexppd(a, src:f64x8::ZERO, m:`0b11111111`, SAE);
10334	transmute(src:r)
10335	}
10336	}
10337
10338	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10339	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10340	///
10341	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_pd&expand=2848)
10342	#[inline]
10343	#[target_feature(enable = "avx512f")]
10344	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10345	#[cfg_attr(test, assert_instr(vgetexppd, SAE = `8`))]
10346	#[rustc_legacy_const_generics(`3`)]
10347	pub fn _mm512_mask_getexp_round_pd<const SAE: i32>(
10348	src: __m512d,
10349	k: __mmask8,
10350	a: __m512d,
10351	) -> __m512d {
10352	unsafe {
10353	static_assert_sae!(SAE);
10354	let a: Simd = a.as_f64x8();
10355	let src: Simd = src.as_f64x8();
10356	let r: Simd = vgetexppd(a, src, m:k, SAE);
10357	transmute(src:r)
10358	}
10359	}
10360
10361	/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10362	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10363	///
10364	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_pd&expand=2849)
10365	#[inline]
10366	#[target_feature(enable = "avx512f")]
10367	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10368	#[cfg_attr(test, assert_instr(vgetexppd, SAE = `8`))]
10369	#[rustc_legacy_const_generics(`2`)]
10370	pub fn _mm512_maskz_getexp_round_pd<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512d {
10371	unsafe {
10372	static_assert_sae!(SAE);
10373	let a: Simd = a.as_f64x8();
10374	let r: Simd = vgetexppd(a, src:f64x8::ZERO, m:k, SAE);
10375	transmute(src:r)
10376	}
10377	}
10378
10379	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10380	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10381	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10382	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10383	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10384	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10385	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10386	///
10387	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10388	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_ps&expand=4790)
10389	#[inline]
10390	#[target_feature(enable = "avx512f")]
10391	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10392	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`, SAE = `8`))]
10393	#[rustc_legacy_const_generics(`1`, `2`)]
10394	pub fn _mm512_roundscale_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 {
10395	unsafe {
10396	static_assert_uimm_bits!(IMM8, `8`);
10397	static_assert_mantissas_sae!(SAE);
10398	let a: Simd = a.as_f32x16();
10399	let r: Simd = vrndscaleps(a, IMM8, src:f32x16::ZERO, mask:`0b11111111_11111111`, SAE);
10400	transmute(src:r)
10401	}
10402	}
10403
10404	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10405	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10406	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10407	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10408	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10409	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10410	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10411	///
10412	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10413	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_ps&expand=4788)
10414	#[inline]
10415	#[target_feature(enable = "avx512f")]
10416	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10417	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`, SAE = `8`))]
10418	#[rustc_legacy_const_generics(`3`, `4`)]
10419	pub fn _mm512_mask_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10420	src: __m512,
10421	k: __mmask16,
10422	a: __m512,
10423	) -> __m512 {
10424	unsafe {
10425	static_assert_uimm_bits!(IMM8, `8`);
10426	static_assert_mantissas_sae!(SAE);
10427	let a: Simd = a.as_f32x16();
10428	let src: Simd = src.as_f32x16();
10429	let r: Simd = vrndscaleps(a, IMM8, src, mask:k, SAE);
10430	transmute(src:r)
10431	}
10432	}
10433
10434	/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10435	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10436	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10437	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10438	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10439	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10440	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10441	///
10442	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10443	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_ps&expand=4789)
10444	#[inline]
10445	#[target_feature(enable = "avx512f")]
10446	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10447	#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = `0`, SAE = `8`))]
10448	#[rustc_legacy_const_generics(`2`, `3`)]
10449	pub fn _mm512_maskz_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10450	k: __mmask16,
10451	a: __m512,
10452	) -> __m512 {
10453	unsafe {
10454	static_assert_uimm_bits!(IMM8, `8`);
10455	static_assert_mantissas_sae!(SAE);
10456	let a: Simd = a.as_f32x16();
10457	let r: Simd = vrndscaleps(a, IMM8, src:f32x16::ZERO, mask:k, SAE);
10458	transmute(src:r)
10459	}
10460	}
10461
10462	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10463	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10464	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10465	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10466	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10467	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10468	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10469	///
10470	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10471	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_pd&expand=4787)
10472	#[inline]
10473	#[target_feature(enable = "avx512f")]
10474	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10475	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`, SAE = `8`))]
10476	#[rustc_legacy_const_generics(`1`, `2`)]
10477	pub fn _mm512_roundscale_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d {
10478	unsafe {
10479	static_assert_uimm_bits!(IMM8, `8`);
10480	static_assert_mantissas_sae!(SAE);
10481	let a: Simd = a.as_f64x8();
10482	let r: Simd = vrndscalepd(a, IMM8, src:f64x8::ZERO, mask:`0b11111111`, SAE);
10483	transmute(src:r)
10484	}
10485	}
10486
10487	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10488	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10489	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10490	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10491	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10492	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10493	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10494	///
10495	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10496	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_pd&expand=4785)
10497	#[inline]
10498	#[target_feature(enable = "avx512f")]
10499	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10500	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`, SAE = `8`))]
10501	#[rustc_legacy_const_generics(`3`, `4`)]
10502	pub fn _mm512_mask_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10503	src: __m512d,
10504	k: __mmask8,
10505	a: __m512d,
10506	) -> __m512d {
10507	unsafe {
10508	static_assert_uimm_bits!(IMM8, `8`);
10509	static_assert_mantissas_sae!(SAE);
10510	let a: Simd = a.as_f64x8();
10511	let src: Simd = src.as_f64x8();
10512	let r: Simd = vrndscalepd(a, IMM8, src, mask:k, SAE);
10513	transmute(src:r)
10514	}
10515	}
10516
10517	/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10518	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10519	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10520	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10521	/// * [`_MM_FROUND_TO_POS_INF`] : round up
10522	/// * [`_MM_FROUND_TO_ZERO`] : truncate
10523	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10524	///
10525	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10526	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_pd&expand=4786)
10527	#[inline]
10528	#[target_feature(enable = "avx512f")]
10529	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10530	#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = `0`, SAE = `8`))]
10531	#[rustc_legacy_const_generics(`2`, `3`)]
10532	pub fn _mm512_maskz_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10533	k: __mmask8,
10534	a: __m512d,
10535	) -> __m512d {
10536	unsafe {
10537	static_assert_uimm_bits!(IMM8, `8`);
10538	static_assert_mantissas_sae!(SAE);
10539	let a: Simd = a.as_f64x8();
10540	let r: Simd = vrndscalepd(a, IMM8, src:f64x8::ZERO, mask:k, SAE);
10541	transmute(src:r)
10542	}
10543	}
10544
10545	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.\
10546	///
10547	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10548	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10549	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10550	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10551	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10552	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10553	///
10554	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_ps&expand=4889)
10555	#[inline]
10556	#[target_feature(enable = "avx512f")]
10557	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10558	#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = `8`))]
10559	#[rustc_legacy_const_generics(`2`)]
10560	pub fn _mm512_scalef_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
10561	unsafe {
10562	static_assert_rounding!(ROUNDING);
10563	let a: Simd = a.as_f32x16();
10564	let b: Simd = b.as_f32x16();
10565	let r: Simd = vscalefps(a, b, src:f32x16::ZERO, mask:`0b11111111_11111111`, ROUNDING);
10566	transmute(src:r)
10567	}
10568	}
10569
10570	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10571	///
10572	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10573	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10574	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10575	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10576	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10577	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10578	///
10579	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_ps&expand=4887)
10580	#[inline]
10581	#[target_feature(enable = "avx512f")]
10582	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10583	#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = `8`))]
10584	#[rustc_legacy_const_generics(`4`)]
10585	pub fn _mm512_mask_scalef_round_ps<const ROUNDING: i32>(
10586	src: __m512,
10587	k: __mmask16,
10588	a: __m512,
10589	b: __m512,
10590	) -> __m512 {
10591	unsafe {
10592	static_assert_rounding!(ROUNDING);
10593	let a: Simd = a.as_f32x16();
10594	let b: Simd = b.as_f32x16();
10595	let src: Simd = src.as_f32x16();
10596	let r: Simd = vscalefps(a, b, src, mask:k, ROUNDING);
10597	transmute(src:r)
10598	}
10599	}
10600
10601	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10602	///
10603	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10604	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10605	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10606	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10607	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10608	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10609	///
10610	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_ps&expand=4888)
10611	#[inline]
10612	#[target_feature(enable = "avx512f")]
10613	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10614	#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = `8`))]
10615	#[rustc_legacy_const_generics(`3`)]
10616	pub fn _mm512_maskz_scalef_round_ps<const ROUNDING: i32>(
10617	k: __mmask16,
10618	a: __m512,
10619	b: __m512,
10620	) -> __m512 {
10621	unsafe {
10622	static_assert_rounding!(ROUNDING);
10623	let a: Simd = a.as_f32x16();
10624	let b: Simd = b.as_f32x16();
10625	let r: Simd = vscalefps(a, b, src:f32x16::ZERO, mask:k, ROUNDING);
10626	transmute(src:r)
10627	}
10628	}
10629
10630	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.\
10631	///
10632	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10633	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10634	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10635	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10636	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10637	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10638	///
10639	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_pd&expand=4886)
10640	#[inline]
10641	#[target_feature(enable = "avx512f")]
10642	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10643	#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = `8`))]
10644	#[rustc_legacy_const_generics(`2`)]
10645	pub fn _mm512_scalef_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
10646	unsafe {
10647	static_assert_rounding!(ROUNDING);
10648	let a: Simd = a.as_f64x8();
10649	let b: Simd = b.as_f64x8();
10650	let r: Simd = vscalefpd(a, b, src:f64x8::ZERO, mask:`0b11111111`, ROUNDING);
10651	transmute(src:r)
10652	}
10653	}
10654
10655	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10656	///
10657	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10658	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10659	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10660	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10661	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10662	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10663	///
10664	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_pd&expand=4884)
10665	#[inline]
10666	#[target_feature(enable = "avx512f")]
10667	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10668	#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = `8`))]
10669	#[rustc_legacy_const_generics(`4`)]
10670	pub fn _mm512_mask_scalef_round_pd<const ROUNDING: i32>(
10671	src: __m512d,
10672	k: __mmask8,
10673	a: __m512d,
10674	b: __m512d,
10675	) -> __m512d {
10676	unsafe {
10677	static_assert_rounding!(ROUNDING);
10678	let a: Simd = a.as_f64x8();
10679	let b: Simd = b.as_f64x8();
10680	let src: Simd = src.as_f64x8();
10681	let r: Simd = vscalefpd(a, b, src, mask:k, ROUNDING);
10682	transmute(src:r)
10683	}
10684	}
10685
10686	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10687	///
10688	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10689	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10690	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10691	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10692	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10693	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10694	///
10695	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_pd&expand=4885)
10696	#[inline]
10697	#[target_feature(enable = "avx512f")]
10698	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10699	#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = `8`))]
10700	#[rustc_legacy_const_generics(`3`)]
10701	pub fn _mm512_maskz_scalef_round_pd<const ROUNDING: i32>(
10702	k: __mmask8,
10703	a: __m512d,
10704	b: __m512d,
10705	) -> __m512d {
10706	unsafe {
10707	static_assert_rounding!(ROUNDING);
10708	let a: Simd = a.as_f64x8();
10709	let b: Simd = b.as_f64x8();
10710	let r: Simd = vscalefpd(a, b, src:f64x8::ZERO, mask:k, ROUNDING);
10711	transmute(src:r)
10712	}
10713	}
10714
10715	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10716	///
10717	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10718	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_ps&expand=2505)
10719	#[inline]
10720	#[target_feature(enable = "avx512f")]
10721	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10722	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`, SAE = `8`))]
10723	#[rustc_legacy_const_generics(`3`, `4`)]
10724	pub fn _mm512_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10725	a: __m512,
10726	b: __m512,
10727	c: __m512i,
10728	) -> __m512 {
10729	unsafe {
10730	static_assert_uimm_bits!(IMM8, `8`);
10731	static_assert_mantissas_sae!(SAE);
10732	let a: Simd = a.as_f32x16();
10733	let b: Simd = b.as_f32x16();
10734	let c: Simd = c.as_i32x16();
10735	let r: Simd = vfixupimmps(a, b, c, IMM8, mask:`0b11111111_11111111`, SAE);
10736	transmute(src:r)
10737	}
10738	}
10739
10740	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10741	///
10742	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10743	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_ps&expand=2506)
10744	#[inline]
10745	#[target_feature(enable = "avx512f")]
10746	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10747	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`, SAE = `8`))]
10748	#[rustc_legacy_const_generics(`4`, `5`)]
10749	pub fn _mm512_mask_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10750	a: __m512,
10751	k: __mmask16,
10752	b: __m512,
10753	c: __m512i,
10754	) -> __m512 {
10755	unsafe {
10756	static_assert_uimm_bits!(IMM8, `8`);
10757	static_assert_mantissas_sae!(SAE);
10758	let a: Simd = a.as_f32x16();
10759	let b: Simd = b.as_f32x16();
10760	let c: Simd = c.as_i32x16();
10761	let r: Simd = vfixupimmps(a, b, c, IMM8, mask:k, SAE);
10762	transmute(src:r)
10763	}
10764	}
10765
10766	/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10767	///
10768	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10769	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_ps&expand=2507)
10770	#[inline]
10771	#[target_feature(enable = "avx512f")]
10772	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10773	#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = `0`, SAE = `8`))]
10774	#[rustc_legacy_const_generics(`4`, `5`)]
10775	pub fn _mm512_maskz_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10776	k: __mmask16,
10777	a: __m512,
10778	b: __m512,
10779	c: __m512i,
10780	) -> __m512 {
10781	unsafe {
10782	static_assert_uimm_bits!(IMM8, `8`);
10783	static_assert_mantissas_sae!(SAE);
10784	let a: Simd = a.as_f32x16();
10785	let b: Simd = b.as_f32x16();
10786	let c: Simd = c.as_i32x16();
10787	let r: Simd = vfixupimmpsz(a, b, c, IMM8, mask:k, SAE);
10788	transmute(src:r)
10789	}
10790	}
10791
10792	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10793	///
10794	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10795	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_pd&expand=2502)
10796	#[inline]
10797	#[target_feature(enable = "avx512f")]
10798	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10799	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`, SAE = `8`))]
10800	#[rustc_legacy_const_generics(`3`, `4`)]
10801	pub fn _mm512_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10802	a: __m512d,
10803	b: __m512d,
10804	c: __m512i,
10805	) -> __m512d {
10806	unsafe {
10807	static_assert_uimm_bits!(IMM8, `8`);
10808	static_assert_mantissas_sae!(SAE);
10809	let a: Simd = a.as_f64x8();
10810	let b: Simd = b.as_f64x8();
10811	let c: Simd = c.as_i64x8();
10812	let r: Simd = vfixupimmpd(a, b, c, IMM8, mask:`0b11111111`, SAE);
10813	transmute(src:r)
10814	}
10815	}
10816
10817	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10818	///
10819	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10820	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_pd&expand=2503)
10821	#[inline]
10822	#[target_feature(enable = "avx512f")]
10823	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10824	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`, SAE = `8`))]
10825	#[rustc_legacy_const_generics(`4`, `5`)]
10826	pub fn _mm512_mask_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10827	a: __m512d,
10828	k: __mmask8,
10829	b: __m512d,
10830	c: __m512i,
10831	) -> __m512d {
10832	unsafe {
10833	static_assert_uimm_bits!(IMM8, `8`);
10834	static_assert_mantissas_sae!(SAE);
10835	let a: Simd = a.as_f64x8();
10836	let b: Simd = b.as_f64x8();
10837	let c: Simd = c.as_i64x8();
10838	let r: Simd = vfixupimmpd(a, b, c, IMM8, mask:k, SAE);
10839	transmute(src:r)
10840	}
10841	}
10842
10843	/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10844	///
10845	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10846	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_pd&expand=2504)
10847	#[inline]
10848	#[target_feature(enable = "avx512f")]
10849	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10850	#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = `0`, SAE = `8`))]
10851	#[rustc_legacy_const_generics(`4`, `5`)]
10852	pub fn _mm512_maskz_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10853	k: __mmask8,
10854	a: __m512d,
10855	b: __m512d,
10856	c: __m512i,
10857	) -> __m512d {
10858	unsafe {
10859	static_assert_uimm_bits!(IMM8, `8`);
10860	static_assert_mantissas_sae!(SAE);
10861	let a: Simd = a.as_f64x8();
10862	let b: Simd = b.as_f64x8();
10863	let c: Simd = c.as_i64x8();
10864	let r: Simd = vfixupimmpdz(a, b, c, IMM8, mask:k, SAE);
10865	transmute(src:r)
10866	}
10867	}
10868
10869	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10870	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10871	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10872	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10873	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10874	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10875	/// The sign is determined by sc which can take the following values:\
10876	/// _MM_MANT_SIGN_src // sign = sign(src)\
10877	/// _MM_MANT_SIGN_zero // sign = 0\
10878	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10879	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10880	///
10881	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_ps&expand=2886)
10882	#[inline]
10883	#[target_feature(enable = "avx512f")]
10884	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10885	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`, SAE = `4`))]
10886	#[rustc_legacy_const_generics(`1`, `2`, `3`)]
10887	pub fn _mm512_getmant_round_ps<
10888	const NORM: _MM_MANTISSA_NORM_ENUM,
10889	const SIGN: _MM_MANTISSA_SIGN_ENUM,
10890	const SAE: i32,
10891	>(
10892	a: __m512,
10893	) -> __m512 {
10894	unsafe {
10895	static_assert_uimm_bits!(NORM, `4`);
10896	static_assert_uimm_bits!(SIGN, `2`);
10897	static_assert_mantissas_sae!(SAE);
10898	let a: Simd = a.as_f32x16();
10899	let r: Simd = vgetmantps(a, SIGN << `2` \| NORM, src:f32x16::ZERO, m:`0b11111111_11111111`, SAE);
10900	transmute(src:r)
10901	}
10902	}
10903
10904	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10905	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10906	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10907	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10908	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10909	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10910	/// The sign is determined by sc which can take the following values:\
10911	/// _MM_MANT_SIGN_src // sign = sign(src)\
10912	/// _MM_MANT_SIGN_zero // sign = 0\
10913	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10914	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10915	///
10916	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_ps&expand=2887)
10917	#[inline]
10918	#[target_feature(enable = "avx512f")]
10919	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10920	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`, SAE = `4`))]
10921	#[rustc_legacy_const_generics(`3`, `4`, `5`)]
10922	pub fn _mm512_mask_getmant_round_ps<
10923	const NORM: _MM_MANTISSA_NORM_ENUM,
10924	const SIGN: _MM_MANTISSA_SIGN_ENUM,
10925	const SAE: i32,
10926	>(
10927	src: __m512,
10928	k: __mmask16,
10929	a: __m512,
10930	) -> __m512 {
10931	unsafe {
10932	static_assert_uimm_bits!(NORM, `4`);
10933	static_assert_uimm_bits!(SIGN, `2`);
10934	static_assert_mantissas_sae!(SAE);
10935	let a: Simd = a.as_f32x16();
10936	let src: Simd = src.as_f32x16();
10937	let r: Simd = vgetmantps(a, SIGN << `2` \| NORM, src, m:k, SAE);
10938	transmute(src:r)
10939	}
10940	}
10941
10942	/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10943	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10944	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10945	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10946	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10947	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10948	/// The sign is determined by sc which can take the following values:\
10949	/// _MM_MANT_SIGN_src // sign = sign(src)\
10950	/// _MM_MANT_SIGN_zero // sign = 0\
10951	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10952	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10953	///
10954	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_ps&expand=2888)
10955	#[inline]
10956	#[target_feature(enable = "avx512f")]
10957	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10958	#[cfg_attr(test, assert_instr(vgetmantps, NORM = `0`, SIGN = `0`, SAE = `4`))]
10959	#[rustc_legacy_const_generics(`2`, `3`, `4`)]
10960	pub fn _mm512_maskz_getmant_round_ps<
10961	const NORM: _MM_MANTISSA_NORM_ENUM,
10962	const SIGN: _MM_MANTISSA_SIGN_ENUM,
10963	const SAE: i32,
10964	>(
10965	k: __mmask16,
10966	a: __m512,
10967	) -> __m512 {
10968	unsafe {
10969	static_assert_uimm_bits!(NORM, `4`);
10970	static_assert_uimm_bits!(SIGN, `2`);
10971	static_assert_mantissas_sae!(SAE);
10972	let a: Simd = a.as_f32x16();
10973	let r: Simd = vgetmantps(a, SIGN << `2` \| NORM, src:f32x16::ZERO, m:k, SAE);
10974	transmute(src:r)
10975	}
10976	}
10977
10978	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10979	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10980	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10981	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10982	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10983	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10984	/// The sign is determined by sc which can take the following values:\
10985	/// _MM_MANT_SIGN_src // sign = sign(src)\
10986	/// _MM_MANT_SIGN_zero // sign = 0\
10987	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10988	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10989	///
10990	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_pd&expand=2883)
10991	#[inline]
10992	#[target_feature(enable = "avx512f")]
10993	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10994	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`, SAE = `4`))]
10995	#[rustc_legacy_const_generics(`1`, `2`, `3`)]
10996	pub fn _mm512_getmant_round_pd<
10997	const NORM: _MM_MANTISSA_NORM_ENUM,
10998	const SIGN: _MM_MANTISSA_SIGN_ENUM,
10999	const SAE: i32,
11000	>(
11001	a: __m512d,
11002	) -> __m512d {
11003	unsafe {
11004	static_assert_uimm_bits!(NORM, `4`);
11005	static_assert_uimm_bits!(SIGN, `2`);
11006	static_assert_mantissas_sae!(SAE);
11007	let a: Simd = a.as_f64x8();
11008	let r: Simd = vgetmantpd(a, SIGN << `2` \| NORM, src:f64x8::ZERO, m:`0b11111111`, SAE);
11009	transmute(src:r)
11010	}
11011	}
11012
11013	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
11014	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
11015	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
11016	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
11017	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
11018	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
11019	/// The sign is determined by sc which can take the following values:\
11020	/// _MM_MANT_SIGN_src // sign = sign(src)\
11021	/// _MM_MANT_SIGN_zero // sign = 0\
11022	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
11023	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
11024	///
11025	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_pd&expand=2884)
11026	#[inline]
11027	#[target_feature(enable = "avx512f")]
11028	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11029	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`, SAE = `4`))]
11030	#[rustc_legacy_const_generics(`3`, `4`, `5`)]
11031	pub fn _mm512_mask_getmant_round_pd<
11032	const NORM: _MM_MANTISSA_NORM_ENUM,
11033	const SIGN: _MM_MANTISSA_SIGN_ENUM,
11034	const SAE: i32,
11035	>(
11036	src: __m512d,
11037	k: __mmask8,
11038	a: __m512d,
11039	) -> __m512d {
11040	unsafe {
11041	static_assert_uimm_bits!(NORM, `4`);
11042	static_assert_uimm_bits!(SIGN, `2`);
11043	static_assert_mantissas_sae!(SAE);
11044	let a: Simd = a.as_f64x8();
11045	let src: Simd = src.as_f64x8();
11046	let r: Simd = vgetmantpd(a, SIGN << `2` \| NORM, src, m:k, SAE);
11047	transmute(src:r)
11048	}
11049	}
11050
11051	/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
11052	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
11053	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
11054	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
11055	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
11056	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
11057	/// The sign is determined by sc which can take the following values:\
11058	/// _MM_MANT_SIGN_src // sign = sign(src)\
11059	/// _MM_MANT_SIGN_zero // sign = 0\
11060	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
11061	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
11062	///
11063	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_pd&expand=2885)
11064	#[inline]
11065	#[target_feature(enable = "avx512f")]
11066	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11067	#[cfg_attr(test, assert_instr(vgetmantpd, NORM = `0`, SIGN = `0`, SAE = `4`))]
11068	#[rustc_legacy_const_generics(`2`, `3`, `4`)]
11069	pub fn _mm512_maskz_getmant_round_pd<
11070	const NORM: _MM_MANTISSA_NORM_ENUM,
11071	const SIGN: _MM_MANTISSA_SIGN_ENUM,
11072	const SAE: i32,
11073	>(
11074	k: __mmask8,
11075	a: __m512d,
11076	) -> __m512d {
11077	unsafe {
11078	static_assert_uimm_bits!(NORM, `4`);
11079	static_assert_uimm_bits!(SIGN, `2`);
11080	static_assert_mantissas_sae!(SAE);
11081	let a: Simd = a.as_f64x8();
11082	let r: Simd = vgetmantpd(a, SIGN << `2` \| NORM, src:f64x8::ZERO, m:k, SAE);
11083	transmute(src:r)
11084	}
11085	}
11086
11087	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
11088	///
11089	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi32&expand=1737)
11090	#[inline]
11091	#[target_feature(enable = "avx512f")]
11092	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11093	#[cfg_attr(test, assert_instr(vcvtps2dq))]
11094	pub fn _mm512_cvtps_epi32(a: __m512) -> __m512i {
11095	unsafe {
11096	transmute(src:vcvtps2dq(
11097	a.as_f32x16(),
11098	src:i32x16::ZERO,
11099	mask:`0b11111111_11111111`,
11100	_MM_FROUND_CUR_DIRECTION,
11101	))
11102	}
11103	}
11104
11105	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11106	///
11107	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi32&expand=1738)
11108	#[inline]
11109	#[target_feature(enable = "avx512f")]
11110	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11111	#[cfg_attr(test, assert_instr(vcvtps2dq))]
11112	pub fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
11113	unsafe {
11114	transmute(src:vcvtps2dq(
11115	a.as_f32x16(),
11116	src.as_i32x16(),
11117	mask:k,
11118	_MM_FROUND_CUR_DIRECTION,
11119	))
11120	}
11121	}
11122
11123	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11124	///
11125	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi32&expand=1739)
11126	#[inline]
11127	#[target_feature(enable = "avx512f")]
11128	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11129	#[cfg_attr(test, assert_instr(vcvtps2dq))]
11130	pub fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i {
11131	unsafe {
11132	transmute(src:vcvtps2dq(
11133	a.as_f32x16(),
11134	src:i32x16::ZERO,
11135	mask:k,
11136	_MM_FROUND_CUR_DIRECTION,
11137	))
11138	}
11139	}
11140
11141	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11142	///
11143	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi32&expand=1735)
11144	#[inline]
11145	#[target_feature(enable = "avx512f,avx512vl")]
11146	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11147	#[cfg_attr(test, assert_instr(vcvtps2dq))]
11148	pub fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
11149	unsafe {
11150	let convert: __m256i = _mm256_cvtps_epi32(a);
11151	transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x8(), no:src.as_i32x8()))
11152	}
11153	}
11154
11155	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11156	///
11157	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi32&expand=1736)
11158	#[inline]
11159	#[target_feature(enable = "avx512f,avx512vl")]
11160	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11161	#[cfg_attr(test, assert_instr(vcvtps2dq))]
11162	pub fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i {
11163	unsafe {
11164	let convert: __m256i = _mm256_cvtps_epi32(a);
11165	transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x8(), no:i32x8::ZERO))
11166	}
11167	}
11168
11169	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11170	///
11171	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi32&expand=1732)
11172	#[inline]
11173	#[target_feature(enable = "avx512f,avx512vl")]
11174	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11175	#[cfg_attr(test, assert_instr(vcvtps2dq))]
11176	pub fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
11177	unsafe {
11178	let convert: __m128i = _mm_cvtps_epi32(a);
11179	transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:src.as_i32x4()))
11180	}
11181	}
11182
11183	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11184	///
11185	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi32&expand=1733)
11186	#[inline]
11187	#[target_feature(enable = "avx512f,avx512vl")]
11188	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11189	#[cfg_attr(test, assert_instr(vcvtps2dq))]
11190	pub fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i {
11191	unsafe {
11192	let convert: __m128i = _mm_cvtps_epi32(a);
11193	transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:i32x4::ZERO))
11194	}
11195	}
11196
11197	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11198	///
11199	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu32&expand=1755)
11200	#[inline]
11201	#[target_feature(enable = "avx512f")]
11202	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11203	#[cfg_attr(test, assert_instr(vcvtps2udq))]
11204	pub fn _mm512_cvtps_epu32(a: __m512) -> __m512i {
11205	unsafe {
11206	transmute(src:vcvtps2udq(
11207	a.as_f32x16(),
11208	src:u32x16::ZERO,
11209	mask:`0b11111111_11111111`,
11210	_MM_FROUND_CUR_DIRECTION,
11211	))
11212	}
11213	}
11214
11215	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11216	///
11217	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu32&expand=1756)
11218	#[inline]
11219	#[target_feature(enable = "avx512f")]
11220	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11221	#[cfg_attr(test, assert_instr(vcvtps2udq))]
11222	pub fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
11223	unsafe {
11224	transmute(src:vcvtps2udq(
11225	a.as_f32x16(),
11226	src.as_u32x16(),
11227	mask:k,
11228	_MM_FROUND_CUR_DIRECTION,
11229	))
11230	}
11231	}
11232
11233	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11234	///
11235	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu32&expand=1343)
11236	#[inline]
11237	#[target_feature(enable = "avx512f")]
11238	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11239	#[cfg_attr(test, assert_instr(vcvtps2udq))]
11240	pub fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i {
11241	unsafe {
11242	transmute(src:vcvtps2udq(
11243	a.as_f32x16(),
11244	src:u32x16::ZERO,
11245	mask:k,
11246	_MM_FROUND_CUR_DIRECTION,
11247	))
11248	}
11249	}
11250
11251	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11252	///
11253	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu32&expand=1752)
11254	#[inline]
11255	#[target_feature(enable = "avx512f,avx512vl")]
11256	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11257	#[cfg_attr(test, assert_instr(vcvtps2udq))]
11258	pub fn _mm256_cvtps_epu32(a: __m256) -> __m256i {
11259	unsafe { transmute(src:vcvtps2udq256(a.as_f32x8(), src:u32x8::ZERO, mask:`0b11111111`)) }
11260	}
11261
11262	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11263	///
11264	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu32&expand=1753)
11265	#[inline]
11266	#[target_feature(enable = "avx512f,avx512vl")]
11267	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11268	#[cfg_attr(test, assert_instr(vcvtps2udq))]
11269	pub fn _mm256_mask_cvtps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
11270	unsafe { transmute(src:vcvtps2udq256(a.as_f32x8(), src.as_u32x8(), mask:k)) }
11271	}
11272
11273	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11274	///
11275	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu32&expand=1754)
11276	#[inline]
11277	#[target_feature(enable = "avx512f,avx512vl")]
11278	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11279	#[cfg_attr(test, assert_instr(vcvtps2udq))]
11280	pub fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i {
11281	unsafe { transmute(src:vcvtps2udq256(a.as_f32x8(), src:u32x8::ZERO, mask:k)) }
11282	}
11283
11284	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11285	///
11286	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu32&expand=1749)
11287	#[inline]
11288	#[target_feature(enable = "avx512f,avx512vl")]
11289	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11290	#[cfg_attr(test, assert_instr(vcvtps2udq))]
11291	pub fn _mm_cvtps_epu32(a: __m128) -> __m128i {
11292	unsafe { transmute(src:vcvtps2udq128(a.as_f32x4(), src:u32x4::ZERO, mask:`0b11111111`)) }
11293	}
11294
11295	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11296	///
11297	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu32&expand=1750)
11298	#[inline]
11299	#[target_feature(enable = "avx512f,avx512vl")]
11300	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11301	#[cfg_attr(test, assert_instr(vcvtps2udq))]
11302	pub fn _mm_mask_cvtps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
11303	unsafe { transmute(src:vcvtps2udq128(a.as_f32x4(), src.as_u32x4(), mask:k)) }
11304	}
11305
11306	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11307	///
11308	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu32&expand=1751)
11309	#[inline]
11310	#[target_feature(enable = "avx512f,avx512vl")]
11311	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11312	#[cfg_attr(test, assert_instr(vcvtps2udq))]
11313	pub fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i {
11314	unsafe { transmute(src:vcvtps2udq128(a.as_f32x4(), src:u32x4::ZERO, mask:k)) }
11315	}
11316
11317	/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
11318	///
11319	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_pd&expand=1769)
11320	#[inline]
11321	#[target_feature(enable = "avx512f")]
11322	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11323	#[cfg_attr(test, assert_instr(vcvtps2pd))]
11324	pub fn _mm512_cvtps_pd(a: __m256) -> __m512d {
11325	unsafe {
11326	transmute(src:vcvtps2pd(
11327	a.as_f32x8(),
11328	src:f64x8::ZERO,
11329	mask:`0b11111111`,
11330	_MM_FROUND_CUR_DIRECTION,
11331	))
11332	}
11333	}
11334
11335	/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11336	///
11337	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_pd&expand=1770)
11338	#[inline]
11339	#[target_feature(enable = "avx512f")]
11340	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11341	#[cfg_attr(test, assert_instr(vcvtps2pd))]
11342	pub fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
11343	unsafe {
11344	transmute(src:vcvtps2pd(
11345	a.as_f32x8(),
11346	src.as_f64x8(),
11347	mask:k,
11348	_MM_FROUND_CUR_DIRECTION,
11349	))
11350	}
11351	}
11352
11353	/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11354	///
11355	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_pd&expand=1771)
11356	#[inline]
11357	#[target_feature(enable = "avx512f")]
11358	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11359	#[cfg_attr(test, assert_instr(vcvtps2pd))]
11360	pub fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d {
11361	unsafe {
11362	transmute(src:vcvtps2pd(
11363	a.as_f32x8(),
11364	src:f64x8::ZERO,
11365	mask:k,
11366	_MM_FROUND_CUR_DIRECTION,
11367	))
11368	}
11369	}
11370
11371	/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
11372	///
11373	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpslo_pd&expand=1784)
11374	#[inline]
11375	#[target_feature(enable = "avx512f")]
11376	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11377	#[cfg_attr(test, assert_instr(vcvtps2pd))]
11378	pub fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d {
11379	unsafe {
11380	transmute(src:vcvtps2pd(
11381	a:_mm512_castps512_ps256(v2).as_f32x8(),
11382	src:f64x8::ZERO,
11383	mask:`0b11111111`,
11384	_MM_FROUND_CUR_DIRECTION,
11385	))
11386	}
11387	}
11388
11389	/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11390	///
11391	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpslo_pd&expand=1785)
11392	#[inline]
11393	#[target_feature(enable = "avx512f")]
11394	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11395	#[cfg_attr(test, assert_instr(vcvtps2pd))]
11396	pub fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> __m512d {
11397	unsafe {
11398	transmute(src:vcvtps2pd(
11399	a:_mm512_castps512_ps256(v2).as_f32x8(),
11400	src.as_f64x8(),
11401	mask:k,
11402	_MM_FROUND_CUR_DIRECTION,
11403	))
11404	}
11405	}
11406
11407	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
11408	///
11409	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_ps&expand=1712)
11410	#[inline]
11411	#[target_feature(enable = "avx512f")]
11412	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11413	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11414	pub fn _mm512_cvtpd_ps(a: __m512d) -> __m256 {
11415	unsafe {
11416	transmute(src:vcvtpd2ps(
11417	a.as_f64x8(),
11418	src:f32x8::ZERO,
11419	mask:`0b11111111`,
11420	_MM_FROUND_CUR_DIRECTION,
11421	))
11422	}
11423	}
11424
11425	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11426	///
11427	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_ps&expand=1713)
11428	#[inline]
11429	#[target_feature(enable = "avx512f")]
11430	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11431	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11432	pub fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m256 {
11433	unsafe {
11434	transmute(src:vcvtpd2ps(
11435	a.as_f64x8(),
11436	src.as_f32x8(),
11437	mask:k,
11438	_MM_FROUND_CUR_DIRECTION,
11439	))
11440	}
11441	}
11442
11443	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11444	///
11445	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_ps&expand=1714)
11446	#[inline]
11447	#[target_feature(enable = "avx512f")]
11448	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11449	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11450	pub fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 {
11451	unsafe {
11452	transmute(src:vcvtpd2ps(
11453	a.as_f64x8(),
11454	src:f32x8::ZERO,
11455	mask:k,
11456	_MM_FROUND_CUR_DIRECTION,
11457	))
11458	}
11459	}
11460
11461	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11462	///
11463	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_ps&expand=1710)
11464	#[inline]
11465	#[target_feature(enable = "avx512f,avx512vl")]
11466	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11467	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11468	pub fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m128 {
11469	unsafe {
11470	let convert: __m128 = _mm256_cvtpd_ps(a);
11471	transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:src.as_f32x4()))
11472	}
11473	}
11474
11475	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11476	///
11477	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_ps&expand=1711)
11478	#[inline]
11479	#[target_feature(enable = "avx512f,avx512vl")]
11480	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11481	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11482	pub fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 {
11483	unsafe {
11484	let convert: __m128 = _mm256_cvtpd_ps(a);
11485	transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:f32x4::ZERO))
11486	}
11487	}
11488
11489	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11490	///
11491	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_ps&expand=1707)
11492	#[inline]
11493	#[target_feature(enable = "avx512f,avx512vl")]
11494	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11495	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11496	pub fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 {
11497	unsafe { vcvtpd2ps128(a.as_f64x2(), src.as_f32x4(), mask:k).as_m128() }
11498	}
11499
11500	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11501	///
11502	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_ps&expand=1708)
11503	#[inline]
11504	#[target_feature(enable = "avx512f,avx512vl")]
11505	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11506	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11507	pub fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 {
11508	unsafe {
11509	let convert: __m128 = _mm_cvtpd_ps(a);
11510	transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:f32x4::ZERO))
11511	}
11512	}
11513
11514	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
11515	///
11516	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi32&expand=1675)
11517	#[inline]
11518	#[target_feature(enable = "avx512f")]
11519	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11520	#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11521	pub fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i {
11522	unsafe {
11523	transmute(src:vcvtpd2dq(
11524	a.as_f64x8(),
11525	src:i32x8::ZERO,
11526	mask:`0b11111111`,
11527	_MM_FROUND_CUR_DIRECTION,
11528	))
11529	}
11530	}
11531
11532	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11533	///
11534	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi32&expand=1676)
11535	#[inline]
11536	#[target_feature(enable = "avx512f")]
11537	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11538	#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11539	pub fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11540	unsafe {
11541	transmute(src:vcvtpd2dq(
11542	a.as_f64x8(),
11543	src.as_i32x8(),
11544	mask:k,
11545	_MM_FROUND_CUR_DIRECTION,
11546	))
11547	}
11548	}
11549
11550	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11551	///
11552	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi32&expand=1677)
11553	#[inline]
11554	#[target_feature(enable = "avx512f")]
11555	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11556	#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11557	pub fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
11558	unsafe {
11559	transmute(src:vcvtpd2dq(
11560	a.as_f64x8(),
11561	src:i32x8::ZERO,
11562	mask:k,
11563	_MM_FROUND_CUR_DIRECTION,
11564	))
11565	}
11566	}
11567
11568	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11569	///
11570	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi32&expand=1673)
11571	#[inline]
11572	#[target_feature(enable = "avx512f,avx512vl")]
11573	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11574	#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11575	pub fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11576	unsafe {
11577	let convert: __m128i = _mm256_cvtpd_epi32(a);
11578	transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:src.as_i32x4()))
11579	}
11580	}
11581
11582	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11583	///
11584	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi32&expand=1674)
11585	#[inline]
11586	#[target_feature(enable = "avx512f,avx512vl")]
11587	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11588	#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11589	pub fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
11590	unsafe {
11591	let convert: __m128i = _mm256_cvtpd_epi32(a);
11592	transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:i32x4::ZERO))
11593	}
11594	}
11595
11596	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11597	///
11598	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi32&expand=1670)
11599	#[inline]
11600	#[target_feature(enable = "avx512f,avx512vl")]
11601	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11602	#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11603	pub fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11604	unsafe { vcvtpd2dq128(a.as_f64x2(), src.as_i32x4(), k).as_m128i() }
11605	}
11606
11607	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11608	///
11609	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi32&expand=1671)
11610	#[inline]
11611	#[target_feature(enable = "avx512f,avx512vl")]
11612	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11613	#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11614	pub fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
11615	unsafe {
11616	let convert: __m128i = _mm_cvtpd_epi32(a);
11617	transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:i32x4::ZERO))
11618	}
11619	}
11620
11621	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11622	///
11623	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu32&expand=1693)
11624	#[inline]
11625	#[target_feature(enable = "avx512f")]
11626	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11627	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11628	pub fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i {
11629	unsafe {
11630	transmute(src:vcvtpd2udq(
11631	a.as_f64x8(),
11632	src:u32x8::ZERO,
11633	mask:`0b11111111`,
11634	_MM_FROUND_CUR_DIRECTION,
11635	))
11636	}
11637	}
11638
11639	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11640	///
11641	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu32&expand=1694)
11642	#[inline]
11643	#[target_feature(enable = "avx512f")]
11644	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11645	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11646	pub fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11647	unsafe {
11648	transmute(src:vcvtpd2udq(
11649	a.as_f64x8(),
11650	src.as_u32x8(),
11651	mask:k,
11652	_MM_FROUND_CUR_DIRECTION,
11653	))
11654	}
11655	}
11656
11657	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11658	///
11659	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu32&expand=1695)
11660	#[inline]
11661	#[target_feature(enable = "avx512f")]
11662	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11663	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11664	pub fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
11665	unsafe {
11666	transmute(src:vcvtpd2udq(
11667	a.as_f64x8(),
11668	src:u32x8::ZERO,
11669	mask:k,
11670	_MM_FROUND_CUR_DIRECTION,
11671	))
11672	}
11673	}
11674
11675	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11676	///
11677	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu32&expand=1690)
11678	#[inline]
11679	#[target_feature(enable = "avx512f,avx512vl")]
11680	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11681	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11682	pub fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i {
11683	unsafe { transmute(src:vcvtpd2udq256(a.as_f64x4(), src:u32x4::ZERO, mask:`0b11111111`)) }
11684	}
11685
11686	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11687	///
11688	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu32&expand=1691)
11689	#[inline]
11690	#[target_feature(enable = "avx512f,avx512vl")]
11691	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11692	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11693	pub fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11694	unsafe { transmute(src:vcvtpd2udq256(a.as_f64x4(), src.as_u32x4(), mask:k)) }
11695	}
11696
11697	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11698	///
11699	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu32&expand=1692)
11700	#[inline]
11701	#[target_feature(enable = "avx512f,avx512vl")]
11702	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11703	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11704	pub fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
11705	unsafe { transmute(src:vcvtpd2udq256(a.as_f64x4(), src:u32x4::ZERO, mask:k)) }
11706	}
11707
11708	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11709	///
11710	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu32&expand=1687)
11711	#[inline]
11712	#[target_feature(enable = "avx512f,avx512vl")]
11713	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11714	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11715	pub fn _mm_cvtpd_epu32(a: __m128d) -> __m128i {
11716	unsafe { transmute(src:vcvtpd2udq128(a.as_f64x2(), src:u32x4::ZERO, mask:`0b11111111`)) }
11717	}
11718
11719	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11720	///
11721	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu32&expand=1688)
11722	#[inline]
11723	#[target_feature(enable = "avx512f,avx512vl")]
11724	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11725	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11726	pub fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11727	unsafe { transmute(src:vcvtpd2udq128(a.as_f64x2(), src.as_u32x4(), mask:k)) }
11728	}
11729
11730	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11731	///
11732	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu32&expand=1689)
11733	#[inline]
11734	#[target_feature(enable = "avx512f,avx512vl")]
11735	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11736	#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11737	pub fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
11738	unsafe { transmute(src:vcvtpd2udq128(a.as_f64x2(), src:u32x4::ZERO, mask:k)) }
11739	}
11740
11741	/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst. The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11742	///
11743	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_pslo&expand=1715)
11744	#[inline]
11745	#[target_feature(enable = "avx512f")]
11746	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11747	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11748	pub fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 {
11749	unsafe {
11750	let r: f32x8 = vcvtpd2ps(
11751	a:v2.as_f64x8(),
11752	src:f32x8::ZERO,
11753	mask:`0b11111111`,
11754	_MM_FROUND_CUR_DIRECTION,
11755	);
11756	simd_shuffle!(
11757	r,
11758	f32x8::ZERO,
11759	[`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`],
11760	)
11761	}
11762	}
11763
11764	/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11765	///
11766	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_pslo&expand=1716)
11767	#[inline]
11768	#[target_feature(enable = "avx512f")]
11769	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11770	#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11771	pub fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> __m512 {
11772	unsafe {
11773	let r: f32x8 = vcvtpd2ps(
11774	a:v2.as_f64x8(),
11775	src:_mm512_castps512_ps256(src).as_f32x8(),
11776	mask:k,
11777	_MM_FROUND_CUR_DIRECTION,
11778	);
11779	simd_shuffle!(
11780	r,
11781	f32x8::ZERO,
11782	[`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`],
11783	)
11784	}
11785	}
11786
11787	/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11788	///
11789	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi32&expand=1535)
11790	#[inline]
11791	#[target_feature(enable = "avx512f")]
11792	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11793	#[cfg_attr(test, assert_instr(vpmovsxbd))]
11794	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11795	pub const fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i {
11796	unsafe {
11797	let a: Simd = a.as_i8x16();
11798	transmute::<i32x16, _>(src:simd_cast(a))
11799	}
11800	}
11801
11802	/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11803	///
11804	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi32&expand=1536)
11805	#[inline]
11806	#[target_feature(enable = "avx512f")]
11807	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11808	#[cfg_attr(test, assert_instr(vpmovsxbd))]
11809	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11810	pub const fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11811	unsafe {
11812	let convert: Simd = _mm512_cvtepi8_epi32(a).as_i32x16();
11813	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
11814	}
11815	}
11816
11817	/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11818	///
11819	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi32&expand=1537)
11820	#[inline]
11821	#[target_feature(enable = "avx512f")]
11822	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11823	#[cfg_attr(test, assert_instr(vpmovsxbd))]
11824	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11825	pub const fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11826	unsafe {
11827	let convert: Simd = _mm512_cvtepi8_epi32(a).as_i32x16();
11828	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x16::ZERO))
11829	}
11830	}
11831
11832	/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11833	///
11834	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi32&expand=1533)
11835	#[inline]
11836	#[target_feature(enable = "avx512f,avx512vl")]
11837	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11838	#[cfg_attr(test, assert_instr(vpmovsxbd))]
11839	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11840	pub const fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11841	unsafe {
11842	let convert: Simd = _mm256_cvtepi8_epi32(a).as_i32x8();
11843	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
11844	}
11845	}
11846
11847	/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11848	///
11849	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi32&expand=1534)
11850	#[inline]
11851	#[target_feature(enable = "avx512f,avx512vl")]
11852	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11853	#[cfg_attr(test, assert_instr(vpmovsxbd))]
11854	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11855	pub const fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11856	unsafe {
11857	let convert: Simd = _mm256_cvtepi8_epi32(a).as_i32x8();
11858	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
11859	}
11860	}
11861
11862	/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11863	///
11864	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi32&expand=1530)
11865	#[inline]
11866	#[target_feature(enable = "avx512f,avx512vl")]
11867	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11868	#[cfg_attr(test, assert_instr(vpmovsxbd))]
11869	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11870	pub const fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11871	unsafe {
11872	let convert: Simd = _mm_cvtepi8_epi32(a).as_i32x4();
11873	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
11874	}
11875	}
11876
11877	/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11878	///
11879	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi32&expand=1531)
11880	#[inline]
11881	#[target_feature(enable = "avx512f,avx512vl")]
11882	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11883	#[cfg_attr(test, assert_instr(vpmovsxbd))]
11884	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11885	pub const fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11886	unsafe {
11887	let convert: Simd = _mm_cvtepi8_epi32(a).as_i32x4();
11888	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
11889	}
11890	}
11891
11892	/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst.
11893	///
11894	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi64&expand=1544)
11895	#[inline]
11896	#[target_feature(enable = "avx512f")]
11897	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11898	#[cfg_attr(test, assert_instr(vpmovsxbq))]
11899	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11900	pub const fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i {
11901	unsafe {
11902	let a: Simd = a.as_i8x16();
11903	let v64: i8x8 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
11904	transmute::<i64x8, _>(src:simd_cast(v64))
11905	}
11906	}
11907
11908	/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11909	///
11910	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi64&expand=1545)
11911	#[inline]
11912	#[target_feature(enable = "avx512f")]
11913	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11914	#[cfg_attr(test, assert_instr(vpmovsxbq))]
11915	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11916	pub const fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11917	unsafe {
11918	let convert: Simd = _mm512_cvtepi8_epi64(a).as_i64x8();
11919	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
11920	}
11921	}
11922
11923	/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11924	///
11925	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi64&expand=1546)
11926	#[inline]
11927	#[target_feature(enable = "avx512f")]
11928	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11929	#[cfg_attr(test, assert_instr(vpmovsxbq))]
11930	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11931	pub const fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11932	unsafe {
11933	let convert: Simd = _mm512_cvtepi8_epi64(a).as_i64x8();
11934	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
11935	}
11936	}
11937
11938	/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11939	///
11940	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi64&expand=1542)
11941	#[inline]
11942	#[target_feature(enable = "avx512f,avx512vl")]
11943	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11944	#[cfg_attr(test, assert_instr(vpmovsxbq))]
11945	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11946	pub const fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11947	unsafe {
11948	let convert: Simd = _mm256_cvtepi8_epi64(a).as_i64x4();
11949	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
11950	}
11951	}
11952
11953	/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11954	///
11955	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi64&expand=1543)
11956	#[inline]
11957	#[target_feature(enable = "avx512f,avx512vl")]
11958	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11959	#[cfg_attr(test, assert_instr(vpmovsxbq))]
11960	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11961	pub const fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11962	unsafe {
11963	let convert: Simd = _mm256_cvtepi8_epi64(a).as_i64x4();
11964	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
11965	}
11966	}
11967
11968	/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11969	///
11970	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi64&expand=1539)
11971	#[inline]
11972	#[target_feature(enable = "avx512f,avx512vl")]
11973	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11974	#[cfg_attr(test, assert_instr(vpmovsxbq))]
11975	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11976	pub const fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11977	unsafe {
11978	let convert: Simd = _mm_cvtepi8_epi64(a).as_i64x2();
11979	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
11980	}
11981	}
11982
11983	/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11984	///
11985	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi64&expand=1540)
11986	#[inline]
11987	#[target_feature(enable = "avx512f,avx512vl")]
11988	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11989	#[cfg_attr(test, assert_instr(vpmovsxbq))]
11990	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11991	pub const fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11992	unsafe {
11993	let convert: Simd = _mm_cvtepi8_epi64(a).as_i64x2();
11994	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
11995	}
11996	}
11997
11998	/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11999	///
12000	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi32&expand=1621)
12001	#[inline]
12002	#[target_feature(enable = "avx512f")]
12003	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12004	#[cfg_attr(test, assert_instr(vpmovzxbd))]
12005	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12006	pub const fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i {
12007	unsafe {
12008	let a: Simd = a.as_u8x16();
12009	transmute::<i32x16, _>(src:simd_cast(a))
12010	}
12011	}
12012
12013	/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12014	///
12015	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi32&expand=1622)
12016	#[inline]
12017	#[target_feature(enable = "avx512f")]
12018	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12019	#[cfg_attr(test, assert_instr(vpmovzxbd))]
12020	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12021	pub const fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
12022	unsafe {
12023	let convert: Simd = _mm512_cvtepu8_epi32(a).as_i32x16();
12024	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
12025	}
12026	}
12027
12028	/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12029	///
12030	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi32&expand=1623)
12031	#[inline]
12032	#[target_feature(enable = "avx512f")]
12033	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12034	#[cfg_attr(test, assert_instr(vpmovzxbd))]
12035	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12036	pub const fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i {
12037	unsafe {
12038	let convert: Simd = _mm512_cvtepu8_epi32(a).as_i32x16();
12039	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x16::ZERO))
12040	}
12041	}
12042
12043	/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12044	///
12045	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi32&expand=1619)
12046	#[inline]
12047	#[target_feature(enable = "avx512f,avx512vl")]
12048	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12049	#[cfg_attr(test, assert_instr(vpmovzxbd))]
12050	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12051	pub const fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12052	unsafe {
12053	let convert: Simd = _mm256_cvtepu8_epi32(a).as_i32x8();
12054	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
12055	}
12056	}
12057
12058	/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12059	///
12060	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi32&expand=1620)
12061	#[inline]
12062	#[target_feature(enable = "avx512f,avx512vl")]
12063	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12064	#[cfg_attr(test, assert_instr(vpmovzxbd))]
12065	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12066	pub const fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i {
12067	unsafe {
12068	let convert: Simd = _mm256_cvtepu8_epi32(a).as_i32x8();
12069	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
12070	}
12071	}
12072
12073	/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12074	///
12075	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi32&expand=1616)
12076	#[inline]
12077	#[target_feature(enable = "avx512f,avx512vl")]
12078	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12079	#[cfg_attr(test, assert_instr(vpmovzxbd))]
12080	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12081	pub const fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12082	unsafe {
12083	let convert: Simd = _mm_cvtepu8_epi32(a).as_i32x4();
12084	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
12085	}
12086	}
12087
12088	/// Zero extend packed unsigned 8-bit integers in th elow 4 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12089	///
12090	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi32&expand=1617)
12091	#[inline]
12092	#[target_feature(enable = "avx512f,avx512vl")]
12093	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12094	#[cfg_attr(test, assert_instr(vpmovzxbd))]
12095	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12096	pub const fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i {
12097	unsafe {
12098	let convert: Simd = _mm_cvtepu8_epi32(a).as_i32x4();
12099	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
12100	}
12101	}
12102
12103	/// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed 64-bit integers, and store the results in dst.
12104	///
12105	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi64&expand=1630)
12106	#[inline]
12107	#[target_feature(enable = "avx512f")]
12108	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12109	#[cfg_attr(test, assert_instr(vpmovzxbq))]
12110	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12111	pub const fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i {
12112	unsafe {
12113	let a: Simd = a.as_u8x16();
12114	let v64: u8x8 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
12115	transmute::<i64x8, _>(src:simd_cast(v64))
12116	}
12117	}
12118
12119	/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12120	///
12121	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi64&expand=1631)
12122	#[inline]
12123	#[target_feature(enable = "avx512f")]
12124	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12125	#[cfg_attr(test, assert_instr(vpmovzxbq))]
12126	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12127	pub const fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12128	unsafe {
12129	let convert: Simd = _mm512_cvtepu8_epi64(a).as_i64x8();
12130	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
12131	}
12132	}
12133
12134	/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12135	///
12136	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi64&expand=1632)
12137	#[inline]
12138	#[target_feature(enable = "avx512f")]
12139	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12140	#[cfg_attr(test, assert_instr(vpmovzxbq))]
12141	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12142	pub const fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i {
12143	unsafe {
12144	let convert: Simd = _mm512_cvtepu8_epi64(a).as_i64x8();
12145	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
12146	}
12147	}
12148
12149	/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12150	///
12151	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi64&expand=1628)
12152	#[inline]
12153	#[target_feature(enable = "avx512f,avx512vl")]
12154	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12155	#[cfg_attr(test, assert_instr(vpmovzxbq))]
12156	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12157	pub const fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12158	unsafe {
12159	let convert: Simd = _mm256_cvtepu8_epi64(a).as_i64x4();
12160	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12161	}
12162	}
12163
12164	/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12165	///
12166	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi64&expand=1629)
12167	#[inline]
12168	#[target_feature(enable = "avx512f,avx512vl")]
12169	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12170	#[cfg_attr(test, assert_instr(vpmovzxbq))]
12171	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12172	pub const fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i {
12173	unsafe {
12174	let convert: Simd = _mm256_cvtepu8_epi64(a).as_i64x4();
12175	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
12176	}
12177	}
12178
12179	/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12180	///
12181	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi64&expand=1625)
12182	#[inline]
12183	#[target_feature(enable = "avx512f,avx512vl")]
12184	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12185	#[cfg_attr(test, assert_instr(vpmovzxbq))]
12186	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12187	pub const fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12188	unsafe {
12189	let convert: Simd = _mm_cvtepu8_epi64(a).as_i64x2();
12190	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12191	}
12192	}
12193
12194	/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12195	///
12196	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi64&expand=1626)
12197	#[inline]
12198	#[target_feature(enable = "avx512f,avx512vl")]
12199	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12200	#[cfg_attr(test, assert_instr(vpmovzxbq))]
12201	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12202	pub const fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i {
12203	unsafe {
12204	let convert: Simd = _mm_cvtepu8_epi64(a).as_i64x2();
12205	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
12206	}
12207	}
12208
12209	/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst.
12210	///
12211	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi32&expand=1389)
12212	#[inline]
12213	#[target_feature(enable = "avx512f")]
12214	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12215	#[cfg_attr(test, assert_instr(vpmovsxwd))]
12216	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12217	pub const fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i {
12218	unsafe {
12219	let a: Simd = a.as_i16x16();
12220	transmute::<i32x16, _>(src:simd_cast(a))
12221	}
12222	}
12223
12224	/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12225	///
12226	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi32&expand=1390)
12227	#[inline]
12228	#[target_feature(enable = "avx512f")]
12229	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12230	#[cfg_attr(test, assert_instr(vpmovsxwd))]
12231	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12232	pub const fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
12233	unsafe {
12234	let convert: Simd = _mm512_cvtepi16_epi32(a).as_i32x16();
12235	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
12236	}
12237	}
12238
12239	/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12240	///
12241	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi32&expand=1391)
12242	#[inline]
12243	#[target_feature(enable = "avx512f")]
12244	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12245	#[cfg_attr(test, assert_instr(vpmovsxwd))]
12246	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12247	pub const fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i {
12248	unsafe {
12249	let convert: Simd = _mm512_cvtepi16_epi32(a).as_i32x16();
12250	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x16::ZERO))
12251	}
12252	}
12253
12254	/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12255	///
12256	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi32&expand=1387)
12257	#[inline]
12258	#[target_feature(enable = "avx512f,avx512vl")]
12259	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12260	#[cfg_attr(test, assert_instr(vpmovsxwd))]
12261	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12262	pub const fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12263	unsafe {
12264	let convert: Simd = _mm256_cvtepi16_epi32(a).as_i32x8();
12265	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
12266	}
12267	}
12268
12269	/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12270	///
12271	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi32&expand=1388)
12272	#[inline]
12273	#[target_feature(enable = "avx512f,avx512vl")]
12274	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12275	#[cfg_attr(test, assert_instr(vpmovsxwd))]
12276	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12277	pub const fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i {
12278	unsafe {
12279	let convert: Simd = _mm256_cvtepi16_epi32(a).as_i32x8();
12280	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
12281	}
12282	}
12283
12284	/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12285	///
12286	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi32&expand=1384)
12287	#[inline]
12288	#[target_feature(enable = "avx512f,avx512vl")]
12289	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12290	#[cfg_attr(test, assert_instr(vpmovsxwd))]
12291	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12292	pub const fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12293	unsafe {
12294	let convert: Simd = _mm_cvtepi16_epi32(a).as_i32x4();
12295	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
12296	}
12297	}
12298
12299	/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12300	///
12301	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi32&expand=1385)
12302	#[inline]
12303	#[target_feature(enable = "avx512f,avx512vl")]
12304	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12305	#[cfg_attr(test, assert_instr(vpmovsxwd))]
12306	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12307	pub const fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i {
12308	unsafe {
12309	let convert: Simd = _mm_cvtepi16_epi32(a).as_i32x4();
12310	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
12311	}
12312	}
12313
12314	/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12315	///
12316	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi64&expand=1398)
12317	#[inline]
12318	#[target_feature(enable = "avx512f")]
12319	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12320	#[cfg_attr(test, assert_instr(vpmovsxwq))]
12321	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12322	pub const fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i {
12323	unsafe {
12324	let a: Simd = a.as_i16x8();
12325	transmute::<i64x8, _>(src:simd_cast(a))
12326	}
12327	}
12328
12329	/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12330	///
12331	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi64&expand=1399)
12332	#[inline]
12333	#[target_feature(enable = "avx512f")]
12334	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12335	#[cfg_attr(test, assert_instr(vpmovsxwq))]
12336	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12337	pub const fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12338	unsafe {
12339	let convert: Simd = _mm512_cvtepi16_epi64(a).as_i64x8();
12340	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
12341	}
12342	}
12343
12344	/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12345	///
12346	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi64&expand=1400)
12347	#[inline]
12348	#[target_feature(enable = "avx512f")]
12349	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12350	#[cfg_attr(test, assert_instr(vpmovsxwq))]
12351	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12352	pub const fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12353	unsafe {
12354	let convert: Simd = _mm512_cvtepi16_epi64(a).as_i64x8();
12355	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
12356	}
12357	}
12358
12359	/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12360	///
12361	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi64&expand=1396)
12362	#[inline]
12363	#[target_feature(enable = "avx512f,avx512vl")]
12364	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12365	#[cfg_attr(test, assert_instr(vpmovsxwq))]
12366	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12367	pub const fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12368	unsafe {
12369	let convert: Simd = _mm256_cvtepi16_epi64(a).as_i64x4();
12370	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12371	}
12372	}
12373
12374	/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12375	///
12376	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi64&expand=1397)
12377	#[inline]
12378	#[target_feature(enable = "avx512f,avx512vl")]
12379	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12380	#[cfg_attr(test, assert_instr(vpmovsxwq))]
12381	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12382	pub const fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12383	unsafe {
12384	let convert: Simd = _mm256_cvtepi16_epi64(a).as_i64x4();
12385	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
12386	}
12387	}
12388
12389	/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12390	///
12391	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi64&expand=1393)
12392	#[inline]
12393	#[target_feature(enable = "avx512f,avx512vl")]
12394	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12395	#[cfg_attr(test, assert_instr(vpmovsxwq))]
12396	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12397	pub const fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12398	unsafe {
12399	let convert: Simd = _mm_cvtepi16_epi64(a).as_i64x2();
12400	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12401	}
12402	}
12403
12404	/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12405	///
12406	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi64&expand=1394)
12407	#[inline]
12408	#[target_feature(enable = "avx512f,avx512vl")]
12409	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12410	#[cfg_attr(test, assert_instr(vpmovsxwq))]
12411	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12412	pub const fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12413	unsafe {
12414	let convert: Simd = _mm_cvtepi16_epi64(a).as_i64x2();
12415	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
12416	}
12417	}
12418
12419	/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst.
12420	///
12421	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi32&expand=1553)
12422	#[inline]
12423	#[target_feature(enable = "avx512f")]
12424	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12425	#[cfg_attr(test, assert_instr(vpmovzxwd))]
12426	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12427	pub const fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i {
12428	unsafe {
12429	let a: Simd = a.as_u16x16();
12430	transmute::<i32x16, _>(src:simd_cast(a))
12431	}
12432	}
12433
12434	/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12435	///
12436	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi32&expand=1554)
12437	#[inline]
12438	#[target_feature(enable = "avx512f")]
12439	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12440	#[cfg_attr(test, assert_instr(vpmovzxwd))]
12441	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12442	pub const fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
12443	unsafe {
12444	let convert: Simd = _mm512_cvtepu16_epi32(a).as_i32x16();
12445	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
12446	}
12447	}
12448
12449	/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12450	///
12451	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi32&expand=1555)
12452	#[inline]
12453	#[target_feature(enable = "avx512f")]
12454	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12455	#[cfg_attr(test, assert_instr(vpmovzxwd))]
12456	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12457	pub const fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i {
12458	unsafe {
12459	let convert: Simd = _mm512_cvtepu16_epi32(a).as_i32x16();
12460	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x16::ZERO))
12461	}
12462	}
12463
12464	/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12465	///
12466	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi32&expand=1551)
12467	#[inline]
12468	#[target_feature(enable = "avx512f,avx512vl")]
12469	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12470	#[cfg_attr(test, assert_instr(vpmovzxwd))]
12471	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12472	pub const fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12473	unsafe {
12474	let convert: Simd = _mm256_cvtepu16_epi32(a).as_i32x8();
12475	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
12476	}
12477	}
12478
12479	/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12480	///
12481	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi32&expand=1552)
12482	#[inline]
12483	#[target_feature(enable = "avx512f,avx512vl")]
12484	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12485	#[cfg_attr(test, assert_instr(vpmovzxwd))]
12486	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12487	pub const fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i {
12488	unsafe {
12489	let convert: Simd = _mm256_cvtepu16_epi32(a).as_i32x8();
12490	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
12491	}
12492	}
12493
12494	/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12495	///
12496	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi32&expand=1548)
12497	#[inline]
12498	#[target_feature(enable = "avx512f,avx512vl")]
12499	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12500	#[cfg_attr(test, assert_instr(vpmovzxwd))]
12501	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12502	pub const fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12503	unsafe {
12504	let convert: Simd = _mm_cvtepu16_epi32(a).as_i32x4();
12505	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
12506	}
12507	}
12508
12509	/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12510	///
12511	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi32&expand=1549)
12512	#[inline]
12513	#[target_feature(enable = "avx512f,avx512vl")]
12514	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12515	#[cfg_attr(test, assert_instr(vpmovzxwd))]
12516	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12517	pub const fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i {
12518	unsafe {
12519	let convert: Simd = _mm_cvtepu16_epi32(a).as_i32x4();
12520	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
12521	}
12522	}
12523
12524	/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12525	///
12526	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi64&expand=1562)
12527	#[inline]
12528	#[target_feature(enable = "avx512f")]
12529	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12530	#[cfg_attr(test, assert_instr(vpmovzxwq))]
12531	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12532	pub const fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i {
12533	unsafe {
12534	let a: Simd = a.as_u16x8();
12535	transmute::<i64x8, _>(src:simd_cast(a))
12536	}
12537	}
12538
12539	/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12540	///
12541	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi64&expand=1563)
12542	#[inline]
12543	#[target_feature(enable = "avx512f")]
12544	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12545	#[cfg_attr(test, assert_instr(vpmovzxwq))]
12546	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12547	pub const fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12548	unsafe {
12549	let convert: Simd = _mm512_cvtepu16_epi64(a).as_i64x8();
12550	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
12551	}
12552	}
12553
12554	/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12555	///
12556	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi64&expand=1564)
12557	#[inline]
12558	#[target_feature(enable = "avx512f")]
12559	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12560	#[cfg_attr(test, assert_instr(vpmovzxwq))]
12561	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12562	pub const fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12563	unsafe {
12564	let convert: Simd = _mm512_cvtepu16_epi64(a).as_i64x8();
12565	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
12566	}
12567	}
12568
12569	/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12570	///
12571	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi64&expand=1560)
12572	#[inline]
12573	#[target_feature(enable = "avx512f,avx512vl")]
12574	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12575	#[cfg_attr(test, assert_instr(vpmovzxwq))]
12576	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12577	pub const fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12578	unsafe {
12579	let convert: Simd = _mm256_cvtepu16_epi64(a).as_i64x4();
12580	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12581	}
12582	}
12583
12584	/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12585	///
12586	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi64&expand=1561)
12587	#[inline]
12588	#[target_feature(enable = "avx512f,avx512vl")]
12589	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12590	#[cfg_attr(test, assert_instr(vpmovzxwq))]
12591	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12592	pub const fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12593	unsafe {
12594	let convert: Simd = _mm256_cvtepu16_epi64(a).as_i64x4();
12595	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
12596	}
12597	}
12598
12599	/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12600	///
12601	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi64&expand=1557)
12602	#[inline]
12603	#[target_feature(enable = "avx512f,avx512vl")]
12604	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12605	#[cfg_attr(test, assert_instr(vpmovzxwq))]
12606	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12607	pub const fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12608	unsafe {
12609	let convert: Simd = _mm_cvtepu16_epi64(a).as_i64x2();
12610	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12611	}
12612	}
12613
12614	/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12615	///
12616	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi64&expand=1558)
12617	#[inline]
12618	#[target_feature(enable = "avx512f,avx512vl")]
12619	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12620	#[cfg_attr(test, assert_instr(vpmovzxwq))]
12621	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12622	pub const fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12623	unsafe {
12624	let convert: Simd = _mm_cvtepu16_epi64(a).as_i64x2();
12625	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
12626	}
12627	}
12628
12629	/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12630	///
12631	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi64&expand=1428)
12632	#[inline]
12633	#[target_feature(enable = "avx512f")]
12634	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12635	#[cfg_attr(test, assert_instr(vpmovsxdq))]
12636	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12637	pub const fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i {
12638	unsafe {
12639	let a: Simd = a.as_i32x8();
12640	transmute::<i64x8, _>(src:simd_cast(a))
12641	}
12642	}
12643
12644	/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12645	///
12646	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi64&expand=1429)
12647	#[inline]
12648	#[target_feature(enable = "avx512f")]
12649	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12650	#[cfg_attr(test, assert_instr(vpmovsxdq))]
12651	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12652	pub const fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12653	unsafe {
12654	let convert: Simd = _mm512_cvtepi32_epi64(a).as_i64x8();
12655	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
12656	}
12657	}
12658
12659	/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12660	///
12661	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi64&expand=1430)
12662	#[inline]
12663	#[target_feature(enable = "avx512f")]
12664	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12665	#[cfg_attr(test, assert_instr(vpmovsxdq))]
12666	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12667	pub const fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12668	unsafe {
12669	let convert: Simd = _mm512_cvtepi32_epi64(a).as_i64x8();
12670	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
12671	}
12672	}
12673
12674	/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12675	///
12676	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi64&expand=1426)
12677	#[inline]
12678	#[target_feature(enable = "avx512f,avx512vl")]
12679	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12680	#[cfg_attr(test, assert_instr(vpmovsxdq))]
12681	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12682	pub const fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12683	unsafe {
12684	let convert: Simd = _mm256_cvtepi32_epi64(a).as_i64x4();
12685	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12686	}
12687	}
12688
12689	/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12690	///
12691	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi64&expand=1427)
12692	#[inline]
12693	#[target_feature(enable = "avx512f,avx512vl")]
12694	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12695	#[cfg_attr(test, assert_instr(vpmovsxdq))]
12696	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12697	pub const fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12698	unsafe {
12699	let convert: Simd = _mm256_cvtepi32_epi64(a).as_i64x4();
12700	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
12701	}
12702	}
12703
12704	/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12705	///
12706	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi64&expand=1423)
12707	#[inline]
12708	#[target_feature(enable = "avx512f,avx512vl")]
12709	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12710	#[cfg_attr(test, assert_instr(vpmovsxdq))]
12711	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12712	pub const fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12713	unsafe {
12714	let convert: Simd = _mm_cvtepi32_epi64(a).as_i64x2();
12715	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12716	}
12717	}
12718
12719	/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12720	///
12721	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi64&expand=1424)
12722	#[inline]
12723	#[target_feature(enable = "avx512f,avx512vl")]
12724	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12725	#[cfg_attr(test, assert_instr(vpmovsxdq))]
12726	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12727	pub const fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12728	unsafe {
12729	let convert: Simd = _mm_cvtepi32_epi64(a).as_i64x2();
12730	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
12731	}
12732	}
12733
12734	/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12735	///
12736	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_epi64&expand=1571)
12737	#[inline]
12738	#[target_feature(enable = "avx512f")]
12739	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12740	#[cfg_attr(test, assert_instr(vpmovzxdq))]
12741	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12742	pub const fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i {
12743	unsafe {
12744	let a: Simd = a.as_u32x8();
12745	transmute::<i64x8, _>(src:simd_cast(a))
12746	}
12747	}
12748
12749	/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12750	///
12751	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_epi64&expand=1572)
12752	#[inline]
12753	#[target_feature(enable = "avx512f")]
12754	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12755	#[cfg_attr(test, assert_instr(vpmovzxdq))]
12756	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12757	pub const fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12758	unsafe {
12759	let convert: Simd = _mm512_cvtepu32_epi64(a).as_i64x8();
12760	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
12761	}
12762	}
12763
12764	/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12765	///
12766	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_epi64&expand=1573)
12767	#[inline]
12768	#[target_feature(enable = "avx512f")]
12769	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12770	#[cfg_attr(test, assert_instr(vpmovzxdq))]
12771	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12772	pub const fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12773	unsafe {
12774	let convert: Simd = _mm512_cvtepu32_epi64(a).as_i64x8();
12775	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
12776	}
12777	}
12778
12779	/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12780	///
12781	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_epi64&expand=1569)
12782	#[inline]
12783	#[target_feature(enable = "avx512f,avx512vl")]
12784	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12785	#[cfg_attr(test, assert_instr(vpmovzxdq))]
12786	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12787	pub const fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12788	unsafe {
12789	let convert: Simd = _mm256_cvtepu32_epi64(a).as_i64x4();
12790	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12791	}
12792	}
12793
12794	/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12795	///
12796	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_epi64&expand=1570)
12797	#[inline]
12798	#[target_feature(enable = "avx512f,avx512vl")]
12799	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12800	#[cfg_attr(test, assert_instr(vpmovzxdq))]
12801	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12802	pub const fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12803	unsafe {
12804	let convert: Simd = _mm256_cvtepu32_epi64(a).as_i64x4();
12805	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
12806	}
12807	}
12808
12809	/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12810	///
12811	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_epi64&expand=1566)
12812	#[inline]
12813	#[target_feature(enable = "avx512f,avx512vl")]
12814	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12815	#[cfg_attr(test, assert_instr(vpmovzxdq))]
12816	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12817	pub const fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12818	unsafe {
12819	let convert: Simd = _mm_cvtepu32_epi64(a).as_i64x2();
12820	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12821	}
12822	}
12823
12824	/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12825	///
12826	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_epi64&expand=1567)
12827	#[inline]
12828	#[target_feature(enable = "avx512f,avx512vl")]
12829	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12830	#[cfg_attr(test, assert_instr(vpmovzxdq))]
12831	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12832	pub const fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12833	unsafe {
12834	let convert: Simd = _mm_cvtepu32_epi64(a).as_i64x2();
12835	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
12836	}
12837	}
12838
12839	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12840	///
12841	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_ps&expand=1455)
12842	#[inline]
12843	#[target_feature(enable = "avx512f")]
12844	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12845	#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12846	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12847	pub const fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 {
12848	unsafe {
12849	let a: Simd = a.as_i32x16();
12850	transmute::<f32x16, _>(src:simd_cast(a))
12851	}
12852	}
12853
12854	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12855	///
12856	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_ps&expand=1456)
12857	#[inline]
12858	#[target_feature(enable = "avx512f")]
12859	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12860	#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12861	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12862	pub const fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12863	unsafe {
12864	let convert: Simd = _mm512_cvtepi32_ps(a).as_f32x16();
12865	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x16()))
12866	}
12867	}
12868
12869	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12870	///
12871	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_ps&expand=1457)
12872	#[inline]
12873	#[target_feature(enable = "avx512f")]
12874	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12875	#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12876	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12877	pub const fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 {
12878	unsafe {
12879	let convert: Simd = _mm512_cvtepi32_ps(a).as_f32x16();
12880	transmute(src:simd_select_bitmask(m:k, yes:convert, no:f32x16::ZERO))
12881	}
12882	}
12883
12884	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12885	///
12886	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_ps&expand=1453)
12887	#[inline]
12888	#[target_feature(enable = "avx512f,avx512vl")]
12889	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12890	#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12891	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12892	pub const fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> __m256 {
12893	unsafe {
12894	let convert: Simd = _mm256_cvtepi32_ps(a).as_f32x8();
12895	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x8()))
12896	}
12897	}
12898
12899	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12900	///
12901	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_ps&expand=1454)
12902	#[inline]
12903	#[target_feature(enable = "avx512f,avx512vl")]
12904	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12905	#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12906	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12907	pub const fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 {
12908	unsafe {
12909	let convert: Simd = _mm256_cvtepi32_ps(a).as_f32x8();
12910	transmute(src:simd_select_bitmask(m:k, yes:convert, no:f32x8::ZERO))
12911	}
12912	}
12913
12914	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12915	///
12916	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_ps&expand=1450)
12917	#[inline]
12918	#[target_feature(enable = "avx512f,avx512vl")]
12919	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12920	#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12921	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12922	pub const fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
12923	unsafe {
12924	let convert: Simd = _mm_cvtepi32_ps(a).as_f32x4();
12925	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x4()))
12926	}
12927	}
12928
12929	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12930	///
12931	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_ps&expand=1451)
12932	#[inline]
12933	#[target_feature(enable = "avx512f,avx512vl")]
12934	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12935	#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12936	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12937	pub const fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 {
12938	unsafe {
12939	let convert: Simd = _mm_cvtepi32_ps(a).as_f32x4();
12940	transmute(src:simd_select_bitmask(m:k, yes:convert, no:f32x4::ZERO))
12941	}
12942	}
12943
12944	/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12945	///
12946	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_pd&expand=1446)
12947	#[inline]
12948	#[target_feature(enable = "avx512f")]
12949	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12950	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12951	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12952	pub const fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d {
12953	unsafe {
12954	let a: Simd = a.as_i32x8();
12955	transmute::<f64x8, _>(src:simd_cast(a))
12956	}
12957	}
12958
12959	/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12960	///
12961	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_pd&expand=1447)
12962	#[inline]
12963	#[target_feature(enable = "avx512f")]
12964	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12965	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12966	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12967	pub const fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12968	unsafe {
12969	let convert: Simd = _mm512_cvtepi32_pd(a).as_f64x8();
12970	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
12971	}
12972	}
12973
12974	/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12975	///
12976	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_pd&expand=1448)
12977	#[inline]
12978	#[target_feature(enable = "avx512f")]
12979	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12980	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12981	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12982	pub const fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d {
12983	unsafe {
12984	let convert: Simd = _mm512_cvtepi32_pd(a).as_f64x8();
12985	transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x8::ZERO))
12986	}
12987	}
12988
12989	/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12990	///
12991	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_pd&expand=1444)
12992	#[inline]
12993	#[target_feature(enable = "avx512f,avx512vl")]
12994	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12995	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12996	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12997	pub const fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12998	unsafe {
12999	let convert: Simd = _mm256_cvtepi32_pd(a).as_f64x4();
13000	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x4()))
13001	}
13002	}
13003
13004	/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13005	///
13006	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_pd&expand=1445)
13007	#[inline]
13008	#[target_feature(enable = "avx512f,avx512vl")]
13009	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13010	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13011	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13012	pub const fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d {
13013	unsafe {
13014	let convert: Simd = _mm256_cvtepi32_pd(a).as_f64x4();
13015	transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x4::ZERO))
13016	}
13017	}
13018
13019	/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13020	///
13021	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_pd&expand=1441)
13022	#[inline]
13023	#[target_feature(enable = "avx512f,avx512vl")]
13024	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13025	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13026	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13027	pub const fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
13028	unsafe {
13029	let convert: Simd = _mm_cvtepi32_pd(a).as_f64x2();
13030	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x2()))
13031	}
13032	}
13033
13034	/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13035	///
13036	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_pd&expand=1442)
13037	#[inline]
13038	#[target_feature(enable = "avx512f,avx512vl")]
13039	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13040	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13041	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13042	pub const fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d {
13043	unsafe {
13044	let convert: Simd = _mm_cvtepi32_pd(a).as_f64x2();
13045	transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x2::ZERO))
13046	}
13047	}
13048
13049	/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
13050	///
13051	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_ps&expand=1583)
13052	#[inline]
13053	#[target_feature(enable = "avx512f")]
13054	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13055	#[cfg_attr(test, assert_instr(vcvtudq2ps))]
13056	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13057	pub const fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 {
13058	unsafe {
13059	let a: Simd = a.as_u32x16();
13060	transmute::<f32x16, _>(src:simd_cast(a))
13061	}
13062	}
13063
13064	/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13065	///
13066	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_ps&expand=1584)
13067	#[inline]
13068	#[target_feature(enable = "avx512f")]
13069	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13070	#[cfg_attr(test, assert_instr(vcvtudq2ps))]
13071	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13072	pub const fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
13073	unsafe {
13074	let convert: Simd = _mm512_cvtepu32_ps(a).as_f32x16();
13075	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x16()))
13076	}
13077	}
13078
13079	/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13080	///
13081	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_ps&expand=1585)
13082	#[inline]
13083	#[target_feature(enable = "avx512f")]
13084	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13085	#[cfg_attr(test, assert_instr(vcvtudq2ps))]
13086	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13087	pub const fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 {
13088	unsafe {
13089	let convert: Simd = _mm512_cvtepu32_ps(a).as_f32x16();
13090	transmute(src:simd_select_bitmask(m:k, yes:convert, no:f32x16::ZERO))
13091	}
13092	}
13093
13094	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
13095	///
13096	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_pd&expand=1580)
13097	#[inline]
13098	#[target_feature(enable = "avx512f")]
13099	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13100	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13101	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13102	pub const fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d {
13103	unsafe {
13104	let a: Simd = a.as_u32x8();
13105	transmute::<f64x8, _>(src:simd_cast(a))
13106	}
13107	}
13108
13109	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13110	///
13111	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_pd&expand=1581)
13112	#[inline]
13113	#[target_feature(enable = "avx512f")]
13114	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13115	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13116	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13117	pub const fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
13118	unsafe {
13119	let convert: Simd = _mm512_cvtepu32_pd(a).as_f64x8();
13120	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
13121	}
13122	}
13123
13124	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13125	///
13126	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_pd&expand=1582)
13127	#[inline]
13128	#[target_feature(enable = "avx512f")]
13129	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13130	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13131	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13132	pub const fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d {
13133	unsafe {
13134	let convert: Simd = _mm512_cvtepu32_pd(a).as_f64x8();
13135	transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x8::ZERO))
13136	}
13137	}
13138
13139	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
13140	///
13141	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_pd&expand=1577)
13142	#[inline]
13143	#[target_feature(enable = "avx512f,avx512vl")]
13144	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13145	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13146	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13147	pub const fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d {
13148	unsafe {
13149	let a: Simd = a.as_u32x4();
13150	transmute::<f64x4, _>(src:simd_cast(a))
13151	}
13152	}
13153
13154	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13155	///
13156	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_pd&expand=1578)
13157	#[inline]
13158	#[target_feature(enable = "avx512f,avx512vl")]
13159	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13160	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13161	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13162	pub const fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
13163	unsafe {
13164	let convert: Simd = _mm256_cvtepu32_pd(a).as_f64x4();
13165	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x4()))
13166	}
13167	}
13168
13169	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13170	///
13171	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_pd&expand=1579)
13172	#[inline]
13173	#[target_feature(enable = "avx512f,avx512vl")]
13174	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13175	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13176	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13177	pub const fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d {
13178	unsafe {
13179	let convert: Simd = _mm256_cvtepu32_pd(a).as_f64x4();
13180	transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x4::ZERO))
13181	}
13182	}
13183
13184	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
13185	///
13186	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_pd&expand=1574)
13187	#[inline]
13188	#[target_feature(enable = "avx512f,avx512vl")]
13189	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13190	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13191	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13192	pub const fn _mm_cvtepu32_pd(a: __m128i) -> __m128d {
13193	unsafe {
13194	let a: Simd = a.as_u32x4();
13195	let u64: u32x2 = simd_shuffle!(a, a, [`0`, `1`]);
13196	transmute::<f64x2, _>(src:simd_cast(u64))
13197	}
13198	}
13199
13200	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13201	///
13202	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_pd&expand=1575)
13203	#[inline]
13204	#[target_feature(enable = "avx512f,avx512vl")]
13205	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13206	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13207	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13208	pub const fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
13209	unsafe {
13210	let convert: Simd = _mm_cvtepu32_pd(a).as_f64x2();
13211	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x2()))
13212	}
13213	}
13214
13215	/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13216	///
13217	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_pd&expand=1576)
13218	#[inline]
13219	#[target_feature(enable = "avx512f,avx512vl")]
13220	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13221	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13222	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13223	pub const fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d {
13224	unsafe {
13225	let convert: Simd = _mm_cvtepu32_pd(a).as_f64x2();
13226	transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x2::ZERO))
13227	}
13228	}
13229
13230	/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
13231	///
13232	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32lo_pd&expand=1464)
13233	#[inline]
13234	#[target_feature(enable = "avx512f")]
13235	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13236	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13237	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13238	pub const fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d {
13239	unsafe {
13240	let v2: Simd = v2.as_i32x16();
13241	let v256: i32x8 = simd_shuffle!(v2, v2, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
13242	transmute::<f64x8, _>(src:simd_cast(v256))
13243	}
13244	}
13245
13246	/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13247	///
13248	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32lo_pd&expand=1465)
13249	#[inline]
13250	#[target_feature(enable = "avx512f")]
13251	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13252	#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13253	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13254	pub const fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
13255	unsafe {
13256	let convert: Simd = _mm512_cvtepi32lo_pd(v2).as_f64x8();
13257	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
13258	}
13259	}
13260
13261	/// Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
13262	///
13263	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32lo_pd&expand=1586)
13264	#[inline]
13265	#[target_feature(enable = "avx512f")]
13266	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13267	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13268	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13269	pub const fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d {
13270	unsafe {
13271	let v2: Simd = v2.as_u32x16();
13272	let v256: u32x8 = simd_shuffle!(v2, v2, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]);
13273	transmute::<f64x8, _>(src:simd_cast(v256))
13274	}
13275	}
13276
13277	/// Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13278	///
13279	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32lo_pd&expand=1587)
13280	#[inline]
13281	#[target_feature(enable = "avx512f")]
13282	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13283	#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13284	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13285	pub const fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
13286	unsafe {
13287	let convert: Simd = _mm512_cvtepu32lo_pd(v2).as_f64x8();
13288	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
13289	}
13290	}
13291
13292	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13293	///
13294	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi16&expand=1419)
13295	#[inline]
13296	#[target_feature(enable = "avx512f")]
13297	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13298	#[cfg_attr(test, assert_instr(vpmovdw))]
13299	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13300	pub const fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i {
13301	unsafe {
13302	let a: Simd = a.as_i32x16();
13303	transmute::<i16x16, _>(src:simd_cast(a))
13304	}
13305	}
13306
13307	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13308	///
13309	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi16&expand=1420)
13310	#[inline]
13311	#[target_feature(enable = "avx512f")]
13312	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13313	#[cfg_attr(test, assert_instr(vpmovdw))]
13314	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13315	pub const fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13316	unsafe {
13317	let convert: Simd = _mm512_cvtepi32_epi16(a).as_i16x16();
13318	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i16x16()))
13319	}
13320	}
13321
13322	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13323	///
13324	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi16&expand=1421)
13325	#[inline]
13326	#[target_feature(enable = "avx512f")]
13327	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13328	#[cfg_attr(test, assert_instr(vpmovdw))]
13329	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13330	pub const fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13331	unsafe {
13332	let convert: Simd = _mm512_cvtepi32_epi16(a).as_i16x16();
13333	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i16x16::ZERO))
13334	}
13335	}
13336
13337	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13338	///
13339	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi16&expand=1416)
13340	#[inline]
13341	#[target_feature(enable = "avx512f,avx512vl")]
13342	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13343	#[cfg_attr(test, assert_instr(vpmovdw))]
13344	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13345	pub const fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i {
13346	unsafe {
13347	let a: Simd = a.as_i32x8();
13348	transmute::<i16x8, _>(src:simd_cast(a))
13349	}
13350	}
13351
13352	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13353	///
13354	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi16&expand=1417)
13355	#[inline]
13356	#[target_feature(enable = "avx512f,avx512vl")]
13357	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13358	#[cfg_attr(test, assert_instr(vpmovdw))]
13359	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13360	pub const fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13361	unsafe {
13362	let convert: Simd = _mm256_cvtepi32_epi16(a).as_i16x8();
13363	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i16x8()))
13364	}
13365	}
13366
13367	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13368	///
13369	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi16&expand=1418)
13370	#[inline]
13371	#[target_feature(enable = "avx512f,avx512vl")]
13372	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13373	#[cfg_attr(test, assert_instr(vpmovdw))]
13374	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13375	pub const fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
13376	unsafe {
13377	let convert: Simd = _mm256_cvtepi32_epi16(a).as_i16x8();
13378	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i16x8::ZERO))
13379	}
13380	}
13381
13382	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13383	///
13384	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi16&expand=1413)
13385	#[inline]
13386	#[target_feature(enable = "avx512f,avx512vl")]
13387	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13388	#[cfg_attr(test, assert_instr(vpmovdw))]
13389	pub fn _mm_cvtepi32_epi16(a: __m128i) -> __m128i {
13390	unsafe { transmute(src:vpmovdw128(a.as_i32x4(), src:i16x8::ZERO, mask:`0b11111111`)) }
13391	}
13392
13393	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13394	///
13395	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi16&expand=1414)
13396	#[inline]
13397	#[target_feature(enable = "avx512f,avx512vl")]
13398	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13399	#[cfg_attr(test, assert_instr(vpmovdw))]
13400	pub fn _mm_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13401	unsafe { transmute(src:vpmovdw128(a.as_i32x4(), src.as_i16x8(), mask:k)) }
13402	}
13403
13404	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13405	///
13406	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi16&expand=1415)
13407	#[inline]
13408	#[target_feature(enable = "avx512f,avx512vl")]
13409	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13410	#[cfg_attr(test, assert_instr(vpmovdw))]
13411	pub fn _mm_maskz_cvtepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13412	unsafe { transmute(src:vpmovdw128(a.as_i32x4(), src:i16x8::ZERO, mask:k)) }
13413	}
13414
13415	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13416	///
13417	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi8&expand=1437)
13418	#[inline]
13419	#[target_feature(enable = "avx512f")]
13420	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13421	#[cfg_attr(test, assert_instr(vpmovdb))]
13422	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13423	pub const fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i {
13424	unsafe {
13425	let a: Simd = a.as_i32x16();
13426	transmute::<i8x16, _>(src:simd_cast(a))
13427	}
13428	}
13429
13430	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13431	///
13432	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi8&expand=1438)
13433	#[inline]
13434	#[target_feature(enable = "avx512f")]
13435	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13436	#[cfg_attr(test, assert_instr(vpmovdb))]
13437	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13438	pub const fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13439	unsafe {
13440	let convert: Simd = _mm512_cvtepi32_epi8(a).as_i8x16();
13441	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i8x16()))
13442	}
13443	}
13444
13445	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13446	///
13447	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi8&expand=1439)
13448	#[inline]
13449	#[target_feature(enable = "avx512f")]
13450	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13451	#[cfg_attr(test, assert_instr(vpmovdb))]
13452	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13453	pub const fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13454	unsafe {
13455	let convert: Simd = _mm512_cvtepi32_epi8(a).as_i8x16();
13456	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i8x16::ZERO))
13457	}
13458	}
13459
13460	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13461	///
13462	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi8&expand=1434)
13463	#[inline]
13464	#[target_feature(enable = "avx512f,avx512vl")]
13465	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13466	#[cfg_attr(test, assert_instr(vpmovdb))]
13467	pub fn _mm256_cvtepi32_epi8(a: __m256i) -> __m128i {
13468	unsafe { transmute(src:vpmovdb256(a.as_i32x8(), src:i8x16::ZERO, mask:`0b11111111`)) }
13469	}
13470
13471	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13472	///
13473	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi8&expand=1435)
13474	#[inline]
13475	#[target_feature(enable = "avx512f,avx512vl")]
13476	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13477	#[cfg_attr(test, assert_instr(vpmovdb))]
13478	pub fn _mm256_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13479	unsafe { transmute(src:vpmovdb256(a.as_i32x8(), src.as_i8x16(), mask:k)) }
13480	}
13481
13482	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13483	///
13484	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi8&expand=1436)
13485	#[inline]
13486	#[target_feature(enable = "avx512f,avx512vl")]
13487	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13488	#[cfg_attr(test, assert_instr(vpmovdb))]
13489	pub fn _mm256_maskz_cvtepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13490	unsafe { transmute(src:vpmovdb256(a.as_i32x8(), src:i8x16::ZERO, mask:k)) }
13491	}
13492
13493	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13494	///
13495	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi8&expand=1431)
13496	#[inline]
13497	#[target_feature(enable = "avx512f,avx512vl")]
13498	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13499	#[cfg_attr(test, assert_instr(vpmovdb))]
13500	pub fn _mm_cvtepi32_epi8(a: __m128i) -> __m128i {
13501	unsafe { transmute(src:vpmovdb128(a.as_i32x4(), src:i8x16::ZERO, mask:`0b11111111`)) }
13502	}
13503
13504	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13505	///
13506	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi8&expand=1432)
13507	#[inline]
13508	#[target_feature(enable = "avx512f,avx512vl")]
13509	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13510	#[cfg_attr(test, assert_instr(vpmovdb))]
13511	pub fn _mm_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13512	unsafe { transmute(src:vpmovdb128(a.as_i32x4(), src.as_i8x16(), mask:k)) }
13513	}
13514
13515	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13516	///
13517	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi8&expand=1433)
13518	#[inline]
13519	#[target_feature(enable = "avx512f,avx512vl")]
13520	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13521	#[cfg_attr(test, assert_instr(vpmovdb))]
13522	pub fn _mm_maskz_cvtepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13523	unsafe { transmute(src:vpmovdb128(a.as_i32x4(), src:i8x16::ZERO, mask:k)) }
13524	}
13525
13526	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13527	///
13528	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi32&expand=1481)
13529	#[inline]
13530	#[target_feature(enable = "avx512f")]
13531	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13532	#[cfg_attr(test, assert_instr(vpmovqd))]
13533	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13534	pub const fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i {
13535	unsafe {
13536	let a: Simd = a.as_i64x8();
13537	transmute::<i32x8, _>(src:simd_cast(a))
13538	}
13539	}
13540
13541	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13542	///
13543	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi32&expand=1482)
13544	#[inline]
13545	#[target_feature(enable = "avx512f")]
13546	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13547	#[cfg_attr(test, assert_instr(vpmovqd))]
13548	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13549	pub const fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13550	unsafe {
13551	let convert: Simd = _mm512_cvtepi64_epi32(a).as_i32x8();
13552	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
13553	}
13554	}
13555
13556	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13557	///
13558	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi32&expand=1483)
13559	#[inline]
13560	#[target_feature(enable = "avx512f")]
13561	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13562	#[cfg_attr(test, assert_instr(vpmovqd))]
13563	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13564	pub const fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13565	unsafe {
13566	let convert: Simd = _mm512_cvtepi64_epi32(a).as_i32x8();
13567	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
13568	}
13569	}
13570
13571	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13572	///
13573	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi32&expand=1478)
13574	#[inline]
13575	#[target_feature(enable = "avx512f,avx512vl")]
13576	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13577	#[cfg_attr(test, assert_instr(vpmovqd))]
13578	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13579	pub const fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i {
13580	unsafe {
13581	let a: Simd = a.as_i64x4();
13582	transmute::<i32x4, _>(src:simd_cast(a))
13583	}
13584	}
13585
13586	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13587	///
13588	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi32&expand=1479)
13589	#[inline]
13590	#[target_feature(enable = "avx512f,avx512vl")]
13591	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13592	#[cfg_attr(test, assert_instr(vpmovqd))]
13593	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13594	pub const fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13595	unsafe {
13596	let convert: Simd = _mm256_cvtepi64_epi32(a).as_i32x4();
13597	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
13598	}
13599	}
13600
13601	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13602	///
13603	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi32&expand=1480)
13604	#[inline]
13605	#[target_feature(enable = "avx512f,avx512vl")]
13606	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13607	#[cfg_attr(test, assert_instr(vpmovqd))]
13608	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13609	pub const fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13610	unsafe {
13611	let convert: Simd = _mm256_cvtepi64_epi32(a).as_i32x4();
13612	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
13613	}
13614	}
13615
13616	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13617	///
13618	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi32&expand=1475)
13619	#[inline]
13620	#[target_feature(enable = "avx512f,avx512vl")]
13621	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13622	#[cfg_attr(test, assert_instr(vpmovqd))]
13623	pub fn _mm_cvtepi64_epi32(a: __m128i) -> __m128i {
13624	unsafe { transmute(src:vpmovqd128(a.as_i64x2(), src:i32x4::ZERO, mask:`0b11111111`)) }
13625	}
13626
13627	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13628	///
13629	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi32&expand=1476)
13630	#[inline]
13631	#[target_feature(enable = "avx512f,avx512vl")]
13632	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13633	#[cfg_attr(test, assert_instr(vpmovqd))]
13634	pub fn _mm_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13635	unsafe { transmute(src:vpmovqd128(a.as_i64x2(), src.as_i32x4(), mask:k)) }
13636	}
13637
13638	/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13639	///
13640	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi32&expand=1477)
13641	#[inline]
13642	#[target_feature(enable = "avx512f,avx512vl")]
13643	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13644	#[cfg_attr(test, assert_instr(vpmovqd))]
13645	pub fn _mm_maskz_cvtepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13646	unsafe { transmute(src:vpmovqd128(a.as_i64x2(), src:i32x4::ZERO, mask:k)) }
13647	}
13648
13649	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13650	///
13651	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi16&expand=1472)
13652	#[inline]
13653	#[target_feature(enable = "avx512f")]
13654	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13655	#[cfg_attr(test, assert_instr(vpmovqw))]
13656	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13657	pub const fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i {
13658	unsafe {
13659	let a: Simd = a.as_i64x8();
13660	transmute::<i16x8, _>(src:simd_cast(a))
13661	}
13662	}
13663
13664	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13665	///
13666	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi16&expand=1473)
13667	#[inline]
13668	#[target_feature(enable = "avx512f")]
13669	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13670	#[cfg_attr(test, assert_instr(vpmovqw))]
13671	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13672	pub const fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13673	unsafe {
13674	let convert: Simd = _mm512_cvtepi64_epi16(a).as_i16x8();
13675	transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i16x8()))
13676	}
13677	}
13678
13679	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13680	///
13681	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi16&expand=1474)
13682	#[inline]
13683	#[target_feature(enable = "avx512f")]
13684	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13685	#[cfg_attr(test, assert_instr(vpmovqw))]
13686	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13687	pub const fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13688	unsafe {
13689	let convert: Simd = _mm512_cvtepi64_epi16(a).as_i16x8();
13690	transmute(src:simd_select_bitmask(m:k, yes:convert, no:i16x8::ZERO))
13691	}
13692	}
13693
13694	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13695	///
13696	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi16&expand=1469)
13697	#[inline]
13698	#[target_feature(enable = "avx512f,avx512vl")]
13699	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13700	#[cfg_attr(test, assert_instr(vpmovqw))]
13701	pub fn _mm256_cvtepi64_epi16(a: __m256i) -> __m128i {
13702	unsafe { transmute(src:vpmovqw256(a.as_i64x4(), src:i16x8::ZERO, mask:`0b11111111`)) }
13703	}
13704
13705	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13706	///
13707	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi16&expand=1470)
13708	#[inline]
13709	#[target_feature(enable = "avx512f,avx512vl")]
13710	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13711	#[cfg_attr(test, assert_instr(vpmovqw))]
13712	pub fn _mm256_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13713	unsafe { transmute(src:vpmovqw256(a.as_i64x4(), src.as_i16x8(), mask:k)) }
13714	}
13715
13716	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13717	///
13718	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi16&expand=1471)
13719	#[inline]
13720	#[target_feature(enable = "avx512f,avx512vl")]
13721	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13722	#[cfg_attr(test, assert_instr(vpmovqw))]
13723	pub fn _mm256_maskz_cvtepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13724	unsafe { transmute(src:vpmovqw256(a.as_i64x4(), src:i16x8::ZERO, mask:k)) }
13725	}
13726
13727	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13728	///
13729	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi16&expand=1466)
13730	#[inline]
13731	#[target_feature(enable = "avx512f,avx512vl")]
13732	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13733	#[cfg_attr(test, assert_instr(vpmovqw))]
13734	pub fn _mm_cvtepi64_epi16(a: __m128i) -> __m128i {
13735	unsafe { transmute(src:vpmovqw128(a.as_i64x2(), src:i16x8::ZERO, mask:`0b11111111`)) }
13736	}
13737
13738	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13739	///
13740	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi16&expand=1467)
13741	#[inline]
13742	#[target_feature(enable = "avx512f,avx512vl")]
13743	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13744	#[cfg_attr(test, assert_instr(vpmovqw))]
13745	pub fn _mm_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13746	unsafe { transmute(src:vpmovqw128(a.as_i64x2(), src.as_i16x8(), mask:k)) }
13747	}
13748
13749	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13750	///
13751	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi16&expand=1468)
13752	#[inline]
13753	#[target_feature(enable = "avx512f,avx512vl")]
13754	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13755	#[cfg_attr(test, assert_instr(vpmovqw))]
13756	pub fn _mm_maskz_cvtepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13757	unsafe { transmute(src:vpmovqw128(a.as_i64x2(), src:i16x8::ZERO, mask:k)) }
13758	}
13759
13760	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13761	///
13762	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi8&expand=1490)
13763	#[inline]
13764	#[target_feature(enable = "avx512f")]
13765	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13766	#[cfg_attr(test, assert_instr(vpmovqb))]
13767	pub fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i {
13768	unsafe { transmute(src:vpmovqb(a.as_i64x8(), src:i8x16::ZERO, mask:`0b11111111`)) }
13769	}
13770
13771	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13772	///
13773	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi8&expand=1491)
13774	#[inline]
13775	#[target_feature(enable = "avx512f")]
13776	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13777	#[cfg_attr(test, assert_instr(vpmovqb))]
13778	pub fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13779	unsafe { transmute(src:vpmovqb(a.as_i64x8(), src.as_i8x16(), mask:k)) }
13780	}
13781
13782	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13783	///
13784	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi8&expand=1492)
13785	#[inline]
13786	#[target_feature(enable = "avx512f")]
13787	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13788	#[cfg_attr(test, assert_instr(vpmovqb))]
13789	pub fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13790	unsafe { transmute(src:vpmovqb(a.as_i64x8(), src:i8x16::ZERO, mask:k)) }
13791	}
13792
13793	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13794	///
13795	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi8&expand=1487)
13796	#[inline]
13797	#[target_feature(enable = "avx512f,avx512vl")]
13798	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13799	#[cfg_attr(test, assert_instr(vpmovqb))]
13800	pub fn _mm256_cvtepi64_epi8(a: __m256i) -> __m128i {
13801	unsafe { transmute(src:vpmovqb256(a.as_i64x4(), src:i8x16::ZERO, mask:`0b11111111`)) }
13802	}
13803
13804	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13805	///
13806	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi8&expand=1488)
13807	#[inline]
13808	#[target_feature(enable = "avx512f,avx512vl")]
13809	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13810	#[cfg_attr(test, assert_instr(vpmovqb))]
13811	pub fn _mm256_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13812	unsafe { transmute(src:vpmovqb256(a.as_i64x4(), src.as_i8x16(), mask:k)) }
13813	}
13814
13815	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13816	///
13817	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi8&expand=1489)
13818	#[inline]
13819	#[target_feature(enable = "avx512f,avx512vl")]
13820	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13821	#[cfg_attr(test, assert_instr(vpmovqb))]
13822	pub fn _mm256_maskz_cvtepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13823	unsafe { transmute(src:vpmovqb256(a.as_i64x4(), src:i8x16::ZERO, mask:k)) }
13824	}
13825
13826	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13827	///
13828	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi8&expand=1484)
13829	#[inline]
13830	#[target_feature(enable = "avx512f,avx512vl")]
13831	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13832	#[cfg_attr(test, assert_instr(vpmovqb))]
13833	pub fn _mm_cvtepi64_epi8(a: __m128i) -> __m128i {
13834	unsafe { transmute(src:vpmovqb128(a.as_i64x2(), src:i8x16::ZERO, mask:`0b11111111`)) }
13835	}
13836
13837	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13838	///
13839	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi8&expand=1485)
13840	#[inline]
13841	#[target_feature(enable = "avx512f,avx512vl")]
13842	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13843	#[cfg_attr(test, assert_instr(vpmovqb))]
13844	pub fn _mm_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13845	unsafe { transmute(src:vpmovqb128(a.as_i64x2(), src.as_i8x16(), mask:k)) }
13846	}
13847
13848	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13849	///
13850	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi8&expand=1486)
13851	#[inline]
13852	#[target_feature(enable = "avx512f,avx512vl")]
13853	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13854	#[cfg_attr(test, assert_instr(vpmovqb))]
13855	pub fn _mm_maskz_cvtepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13856	unsafe { transmute(src:vpmovqb128(a.as_i64x2(), src:i8x16::ZERO, mask:k)) }
13857	}
13858
13859	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13860	///
13861	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi16&expand=1819)
13862	#[inline]
13863	#[target_feature(enable = "avx512f")]
13864	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13865	#[cfg_attr(test, assert_instr(vpmovsdw))]
13866	pub fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i {
13867	unsafe { transmute(src:vpmovsdw(a.as_i32x16(), src:i16x16::ZERO, mask:`0b11111111_11111111`)) }
13868	}
13869
13870	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13871	///
13872	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi16&expand=1820)
13873	#[inline]
13874	#[target_feature(enable = "avx512f")]
13875	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13876	#[cfg_attr(test, assert_instr(vpmovsdw))]
13877	pub fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13878	unsafe { transmute(src:vpmovsdw(a.as_i32x16(), src.as_i16x16(), mask:k)) }
13879	}
13880
13881	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13882	///
13883	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi16&expand=1819)
13884	#[inline]
13885	#[target_feature(enable = "avx512f")]
13886	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13887	#[cfg_attr(test, assert_instr(vpmovsdw))]
13888	pub fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13889	unsafe { transmute(src:vpmovsdw(a.as_i32x16(), src:i16x16::ZERO, mask:k)) }
13890	}
13891
13892	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13893	///
13894	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi16&expand=1816)
13895	#[inline]
13896	#[target_feature(enable = "avx512f,avx512vl")]
13897	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13898	#[cfg_attr(test, assert_instr(vpmovsdw))]
13899	pub fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i {
13900	unsafe { transmute(src:vpmovsdw256(a.as_i32x8(), src:i16x8::ZERO, mask:`0b11111111`)) }
13901	}
13902
13903	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13904	///
13905	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi16&expand=1817)
13906	#[inline]
13907	#[target_feature(enable = "avx512f,avx512vl")]
13908	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13909	#[cfg_attr(test, assert_instr(vpmovsdw))]
13910	pub fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13911	unsafe { transmute(src:vpmovsdw256(a.as_i32x8(), src.as_i16x8(), mask:k)) }
13912	}
13913
13914	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13915	///
13916	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi16&expand=1818)
13917	#[inline]
13918	#[target_feature(enable = "avx512f,avx512vl")]
13919	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13920	#[cfg_attr(test, assert_instr(vpmovsdw))]
13921	pub fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
13922	unsafe { transmute(src:vpmovsdw256(a.as_i32x8(), src:i16x8::ZERO, mask:k)) }
13923	}
13924
13925	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13926	///
13927	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi16&expand=1813)
13928	#[inline]
13929	#[target_feature(enable = "avx512f,avx512vl")]
13930	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13931	#[cfg_attr(test, assert_instr(vpmovsdw))]
13932	pub fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i {
13933	unsafe { transmute(src:vpmovsdw128(a.as_i32x4(), src:i16x8::ZERO, mask:`0b11111111`)) }
13934	}
13935
13936	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13937	///
13938	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi16&expand=1814)
13939	#[inline]
13940	#[target_feature(enable = "avx512f,avx512vl")]
13941	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13942	#[cfg_attr(test, assert_instr(vpmovsdw))]
13943	pub fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13944	unsafe { transmute(src:vpmovsdw128(a.as_i32x4(), src.as_i16x8(), mask:k)) }
13945	}
13946
13947	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13948	///
13949	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi16&expand=1815)
13950	#[inline]
13951	#[target_feature(enable = "avx512f,avx512vl")]
13952	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13953	#[cfg_attr(test, assert_instr(vpmovsdw))]
13954	pub fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13955	unsafe { transmute(src:vpmovsdw128(a.as_i32x4(), src:i16x8::ZERO, mask:k)) }
13956	}
13957
13958	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13959	///
13960	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi8&expand=1828)
13961	#[inline]
13962	#[target_feature(enable = "avx512f")]
13963	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13964	#[cfg_attr(test, assert_instr(vpmovsdb))]
13965	pub fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i {
13966	unsafe { transmute(src:vpmovsdb(a.as_i32x16(), src:i8x16::ZERO, mask:`0b11111111_11111111`)) }
13967	}
13968
13969	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13970	///
13971	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi8&expand=1829)
13972	#[inline]
13973	#[target_feature(enable = "avx512f")]
13974	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13975	#[cfg_attr(test, assert_instr(vpmovsdb))]
13976	pub fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13977	unsafe { transmute(src:vpmovsdb(a.as_i32x16(), src.as_i8x16(), mask:k)) }
13978	}
13979
13980	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13981	///
13982	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi8&expand=1830)
13983	#[inline]
13984	#[target_feature(enable = "avx512f")]
13985	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13986	#[cfg_attr(test, assert_instr(vpmovsdb))]
13987	pub fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13988	unsafe { transmute(src:vpmovsdb(a.as_i32x16(), src:i8x16::ZERO, mask:k)) }
13989	}
13990
13991	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13992	///
13993	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi8&expand=1825)
13994	#[inline]
13995	#[target_feature(enable = "avx512f,avx512vl")]
13996	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13997	#[cfg_attr(test, assert_instr(vpmovsdb))]
13998	pub fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i {
13999	unsafe { transmute(src:vpmovsdb256(a.as_i32x8(), src:i8x16::ZERO, mask:`0b11111111`)) }
14000	}
14001
14002	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14003	///
14004	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi8&expand=1826)
14005	#[inline]
14006	#[target_feature(enable = "avx512f,avx512vl")]
14007	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14008	#[cfg_attr(test, assert_instr(vpmovsdb))]
14009	pub fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14010	unsafe { transmute(src:vpmovsdb256(a.as_i32x8(), src.as_i8x16(), mask:k)) }
14011	}
14012
14013	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14014	///
14015	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi8&expand=1827)
14016	#[inline]
14017	#[target_feature(enable = "avx512f,avx512vl")]
14018	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14019	#[cfg_attr(test, assert_instr(vpmovsdb))]
14020	pub fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
14021	unsafe { transmute(src:vpmovsdb256(a.as_i32x8(), src:i8x16::ZERO, mask:k)) }
14022	}
14023
14024	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
14025	///
14026	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi8&expand=1822)
14027	#[inline]
14028	#[target_feature(enable = "avx512f,avx512vl")]
14029	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14030	#[cfg_attr(test, assert_instr(vpmovsdb))]
14031	pub fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i {
14032	unsafe { transmute(src:vpmovsdb128(a.as_i32x4(), src:i8x16::ZERO, mask:`0b11111111`)) }
14033	}
14034
14035	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14036	///
14037	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi8&expand=1823)
14038	#[inline]
14039	#[target_feature(enable = "avx512f,avx512vl")]
14040	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14041	#[cfg_attr(test, assert_instr(vpmovsdb))]
14042	pub fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14043	unsafe { transmute(src:vpmovsdb128(a.as_i32x4(), src.as_i8x16(), mask:k)) }
14044	}
14045
14046	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14047	///
14048	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi8&expand=1824)
14049	#[inline]
14050	#[target_feature(enable = "avx512f,avx512vl")]
14051	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14052	#[cfg_attr(test, assert_instr(vpmovsdb))]
14053	pub fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
14054	unsafe { transmute(src:vpmovsdb128(a.as_i32x4(), src:i8x16::ZERO, mask:k)) }
14055	}
14056
14057	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
14058	///
14059	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi32&expand=1852)
14060	#[inline]
14061	#[target_feature(enable = "avx512f")]
14062	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14063	#[cfg_attr(test, assert_instr(vpmovsqd))]
14064	pub fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i {
14065	unsafe { transmute(src:vpmovsqd(a.as_i64x8(), src:i32x8::ZERO, mask:`0b11111111`)) }
14066	}
14067
14068	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14069	///
14070	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi32&expand=1853)
14071	#[inline]
14072	#[target_feature(enable = "avx512f")]
14073	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14074	#[cfg_attr(test, assert_instr(vpmovsqd))]
14075	pub fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
14076	unsafe { transmute(src:vpmovsqd(a.as_i64x8(), src.as_i32x8(), mask:k)) }
14077	}
14078
14079	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14080	///
14081	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi32&expand=1854)
14082	#[inline]
14083	#[target_feature(enable = "avx512f")]
14084	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14085	#[cfg_attr(test, assert_instr(vpmovsqd))]
14086	pub fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
14087	unsafe { transmute(src:vpmovsqd(a.as_i64x8(), src:i32x8::ZERO, mask:k)) }
14088	}
14089
14090	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
14091	///
14092	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi32&expand=1849)
14093	#[inline]
14094	#[target_feature(enable = "avx512f,avx512vl")]
14095	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14096	#[cfg_attr(test, assert_instr(vpmovsqd))]
14097	pub fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i {
14098	unsafe { transmute(src:vpmovsqd256(a.as_i64x4(), src:i32x4::ZERO, mask:`0b11111111`)) }
14099	}
14100
14101	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14102	///
14103	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi32&expand=1850)
14104	#[inline]
14105	#[target_feature(enable = "avx512f,avx512vl")]
14106	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14107	#[cfg_attr(test, assert_instr(vpmovsqd))]
14108	pub fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14109	unsafe { transmute(src:vpmovsqd256(a.as_i64x4(), src.as_i32x4(), mask:k)) }
14110	}
14111
14112	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14113	///
14114	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi32&expand=1851)
14115	#[inline]
14116	#[target_feature(enable = "avx512f,avx512vl")]
14117	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14118	#[cfg_attr(test, assert_instr(vpmovsqd))]
14119	pub fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
14120	unsafe { transmute(src:vpmovsqd256(a.as_i64x4(), src:i32x4::ZERO, mask:k)) }
14121	}
14122
14123	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
14124	///
14125	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi32&expand=1846)
14126	#[inline]
14127	#[target_feature(enable = "avx512f,avx512vl")]
14128	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14129	#[cfg_attr(test, assert_instr(vpmovsqd))]
14130	pub fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i {
14131	unsafe { transmute(src:vpmovsqd128(a.as_i64x2(), src:i32x4::ZERO, mask:`0b11111111`)) }
14132	}
14133
14134	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14135	///
14136	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi32&expand=1847)
14137	#[inline]
14138	#[target_feature(enable = "avx512f,avx512vl")]
14139	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14140	#[cfg_attr(test, assert_instr(vpmovsqd))]
14141	pub fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14142	unsafe { transmute(src:vpmovsqd128(a.as_i64x2(), src.as_i32x4(), mask:k)) }
14143	}
14144
14145	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14146	///
14147	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi32&expand=1848)
14148	#[inline]
14149	#[target_feature(enable = "avx512f,avx512vl")]
14150	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14151	#[cfg_attr(test, assert_instr(vpmovsqd))]
14152	pub fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
14153	unsafe { transmute(src:vpmovsqd128(a.as_i64x2(), src:i32x4::ZERO, mask:k)) }
14154	}
14155
14156	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
14157	///
14158	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi16&expand=1843)
14159	#[inline]
14160	#[target_feature(enable = "avx512f")]
14161	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14162	#[cfg_attr(test, assert_instr(vpmovsqw))]
14163	pub fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i {
14164	unsafe { transmute(src:vpmovsqw(a.as_i64x8(), src:i16x8::ZERO, mask:`0b11111111`)) }
14165	}
14166
14167	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14168	///
14169	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi16&expand=1844)
14170	#[inline]
14171	#[target_feature(enable = "avx512f")]
14172	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14173	#[cfg_attr(test, assert_instr(vpmovsqw))]
14174	pub fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14175	unsafe { transmute(src:vpmovsqw(a.as_i64x8(), src.as_i16x8(), mask:k)) }
14176	}
14177
14178	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14179	///
14180	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi16&expand=1845)
14181	#[inline]
14182	#[target_feature(enable = "avx512f")]
14183	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14184	#[cfg_attr(test, assert_instr(vpmovsqw))]
14185	pub fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
14186	unsafe { transmute(src:vpmovsqw(a.as_i64x8(), src:i16x8::ZERO, mask:k)) }
14187	}
14188
14189	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
14190	///
14191	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi16&expand=1840)
14192	#[inline]
14193	#[target_feature(enable = "avx512f,avx512vl")]
14194	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14195	#[cfg_attr(test, assert_instr(vpmovsqw))]
14196	pub fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i {
14197	unsafe { transmute(src:vpmovsqw256(a.as_i64x4(), src:i16x8::ZERO, mask:`0b11111111`)) }
14198	}
14199
14200	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14201	///
14202	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi16&expand=1841)
14203	#[inline]
14204	#[target_feature(enable = "avx512f,avx512vl")]
14205	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14206	#[cfg_attr(test, assert_instr(vpmovsqw))]
14207	pub fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14208	unsafe { transmute(src:vpmovsqw256(a.as_i64x4(), src.as_i16x8(), mask:k)) }
14209	}
14210
14211	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14212	///
14213	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi16&expand=1842)
14214	#[inline]
14215	#[target_feature(enable = "avx512f,avx512vl")]
14216	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14217	#[cfg_attr(test, assert_instr(vpmovsqw))]
14218	pub fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
14219	unsafe { transmute(src:vpmovsqw256(a.as_i64x4(), src:i16x8::ZERO, mask:k)) }
14220	}
14221
14222	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
14223	///
14224	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi16&expand=1837)
14225	#[inline]
14226	#[target_feature(enable = "avx512f,avx512vl")]
14227	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14228	#[cfg_attr(test, assert_instr(vpmovsqw))]
14229	pub fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i {
14230	unsafe { transmute(src:vpmovsqw128(a.as_i64x2(), src:i16x8::ZERO, mask:`0b11111111`)) }
14231	}
14232
14233	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14234	///
14235	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi16&expand=1838)
14236	#[inline]
14237	#[target_feature(enable = "avx512f,avx512vl")]
14238	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14239	#[cfg_attr(test, assert_instr(vpmovsqw))]
14240	pub fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14241	unsafe { transmute(src:vpmovsqw128(a.as_i64x2(), src.as_i16x8(), mask:k)) }
14242	}
14243
14244	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14245	///
14246	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi16&expand=1839)
14247	#[inline]
14248	#[target_feature(enable = "avx512f,avx512vl")]
14249	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14250	#[cfg_attr(test, assert_instr(vpmovsqw))]
14251	pub fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
14252	unsafe { transmute(src:vpmovsqw128(a.as_i64x2(), src:i16x8::ZERO, mask:k)) }
14253	}
14254
14255	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
14256	///
14257	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi8&expand=1861)
14258	#[inline]
14259	#[target_feature(enable = "avx512f")]
14260	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14261	#[cfg_attr(test, assert_instr(vpmovsqb))]
14262	pub fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i {
14263	unsafe { transmute(src:vpmovsqb(a.as_i64x8(), src:i8x16::ZERO, mask:`0b11111111`)) }
14264	}
14265
14266	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14267	///
14268	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi8&expand=1862)
14269	#[inline]
14270	#[target_feature(enable = "avx512f")]
14271	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14272	#[cfg_attr(test, assert_instr(vpmovsqb))]
14273	pub fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14274	unsafe { transmute(src:vpmovsqb(a.as_i64x8(), src.as_i8x16(), mask:k)) }
14275	}
14276
14277	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14278	///
14279	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi8&expand=1863)
14280	#[inline]
14281	#[target_feature(enable = "avx512f")]
14282	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14283	#[cfg_attr(test, assert_instr(vpmovsqb))]
14284	pub fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
14285	unsafe { transmute(src:vpmovsqb(a.as_i64x8(), src:i8x16::ZERO, mask:k)) }
14286	}
14287
14288	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
14289	///
14290	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi8&expand=1858)
14291	#[inline]
14292	#[target_feature(enable = "avx512f,avx512vl")]
14293	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14294	#[cfg_attr(test, assert_instr(vpmovsqb))]
14295	pub fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i {
14296	unsafe { transmute(src:vpmovsqb256(a.as_i64x4(), src:i8x16::ZERO, mask:`0b11111111`)) }
14297	}
14298
14299	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14300	///
14301	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi8&expand=1859)
14302	#[inline]
14303	#[target_feature(enable = "avx512f,avx512vl")]
14304	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14305	#[cfg_attr(test, assert_instr(vpmovsqb))]
14306	pub fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14307	unsafe { transmute(src:vpmovsqb256(a.as_i64x4(), src.as_i8x16(), mask:k)) }
14308	}
14309
14310	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14311	///
14312	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi8&expand=1860)
14313	#[inline]
14314	#[target_feature(enable = "avx512f,avx512vl")]
14315	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14316	#[cfg_attr(test, assert_instr(vpmovsqb))]
14317	pub fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
14318	unsafe { transmute(src:vpmovsqb256(a.as_i64x4(), src:i8x16::ZERO, mask:k)) }
14319	}
14320
14321	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
14322	///
14323	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi8&expand=1855)
14324	#[inline]
14325	#[target_feature(enable = "avx512f,avx512vl")]
14326	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14327	#[cfg_attr(test, assert_instr(vpmovsqb))]
14328	pub fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i {
14329	unsafe { transmute(src:vpmovsqb128(a.as_i64x2(), src:i8x16::ZERO, mask:`0b11111111`)) }
14330	}
14331
14332	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14333	///
14334	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi8&expand=1856)
14335	#[inline]
14336	#[target_feature(enable = "avx512f,avx512vl")]
14337	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14338	#[cfg_attr(test, assert_instr(vpmovsqb))]
14339	pub fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14340	unsafe { transmute(src:vpmovsqb128(a.as_i64x2(), src.as_i8x16(), mask:k)) }
14341	}
14342
14343	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14344	///
14345	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi8&expand=1857)
14346	#[inline]
14347	#[target_feature(enable = "avx512f,avx512vl")]
14348	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14349	#[cfg_attr(test, assert_instr(vpmovsqb))]
14350	pub fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
14351	unsafe { transmute(src:vpmovsqb128(a.as_i64x2(), src:i8x16::ZERO, mask:k)) }
14352	}
14353
14354	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14355	///
14356	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi16&expand=2054)
14357	#[inline]
14358	#[target_feature(enable = "avx512f")]
14359	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14360	#[cfg_attr(test, assert_instr(vpmovusdw))]
14361	pub fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i {
14362	unsafe { transmute(src:vpmovusdw(a.as_u32x16(), src:u16x16::ZERO, mask:`0b11111111_11111111`)) }
14363	}
14364
14365	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14366	///
14367	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi16&expand=2055)
14368	#[inline]
14369	#[target_feature(enable = "avx512f")]
14370	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14371	#[cfg_attr(test, assert_instr(vpmovusdw))]
14372	pub fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
14373	unsafe { transmute(src:vpmovusdw(a.as_u32x16(), src.as_u16x16(), mask:k)) }
14374	}
14375
14376	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14377	///
14378	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi16&expand=2056)
14379	#[inline]
14380	#[target_feature(enable = "avx512f")]
14381	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14382	#[cfg_attr(test, assert_instr(vpmovusdw))]
14383	pub fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
14384	unsafe { transmute(src:vpmovusdw(a.as_u32x16(), src:u16x16::ZERO, mask:k)) }
14385	}
14386
14387	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14388	///
14389	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi16&expand=2051)
14390	#[inline]
14391	#[target_feature(enable = "avx512f,avx512vl")]
14392	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14393	#[cfg_attr(test, assert_instr(vpmovusdw))]
14394	pub fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i {
14395	unsafe { transmute(src:vpmovusdw256(a.as_u32x8(), src:u16x8::ZERO, mask:`0b11111111`)) }
14396	}
14397
14398	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14399	///
14400	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi16&expand=2052)
14401	#[inline]
14402	#[target_feature(enable = "avx512f,avx512vl")]
14403	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14404	#[cfg_attr(test, assert_instr(vpmovusdw))]
14405	pub fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14406	unsafe { transmute(src:vpmovusdw256(a.as_u32x8(), src.as_u16x8(), mask:k)) }
14407	}
14408
14409	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14410	///
14411	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi16&expand=2053)
14412	#[inline]
14413	#[target_feature(enable = "avx512f,avx512vl")]
14414	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14415	#[cfg_attr(test, assert_instr(vpmovusdw))]
14416	pub fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
14417	unsafe { transmute(src:vpmovusdw256(a.as_u32x8(), src:u16x8::ZERO, mask:k)) }
14418	}
14419
14420	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14421	///
14422	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi16&expand=2048)
14423	#[inline]
14424	#[target_feature(enable = "avx512f,avx512vl")]
14425	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14426	#[cfg_attr(test, assert_instr(vpmovusdw))]
14427	pub fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i {
14428	unsafe { transmute(src:vpmovusdw128(a.as_u32x4(), src:u16x8::ZERO, mask:`0b11111111`)) }
14429	}
14430
14431	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14432	///
14433	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi16&expand=2049)
14434	#[inline]
14435	#[target_feature(enable = "avx512f,avx512vl")]
14436	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14437	#[cfg_attr(test, assert_instr(vpmovusdw))]
14438	pub fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14439	unsafe { transmute(src:vpmovusdw128(a.as_u32x4(), src.as_u16x8(), mask:k)) }
14440	}
14441
14442	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14443	///
14444	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi16&expand=2050)
14445	#[inline]
14446	#[target_feature(enable = "avx512f,avx512vl")]
14447	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14448	#[cfg_attr(test, assert_instr(vpmovusdw))]
14449	pub fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
14450	unsafe { transmute(src:vpmovusdw128(a.as_u32x4(), src:u16x8::ZERO, mask:k)) }
14451	}
14452
14453	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14454	///
14455	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi8&expand=2063)
14456	#[inline]
14457	#[target_feature(enable = "avx512f")]
14458	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14459	#[cfg_attr(test, assert_instr(vpmovusdb))]
14460	pub fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i {
14461	unsafe { transmute(src:vpmovusdb(a.as_u32x16(), src:u8x16::ZERO, mask:`0b11111111_11111111`)) }
14462	}
14463
14464	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14465	///
14466	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi8&expand=2064)
14467	#[inline]
14468	#[target_feature(enable = "avx512f")]
14469	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14470	#[cfg_attr(test, assert_instr(vpmovusdb))]
14471	pub fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
14472	unsafe { transmute(src:vpmovusdb(a.as_u32x16(), src.as_u8x16(), mask:k)) }
14473	}
14474
14475	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14476	///
14477	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi8&expand=2065)
14478	#[inline]
14479	#[target_feature(enable = "avx512f")]
14480	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14481	#[cfg_attr(test, assert_instr(vpmovusdb))]
14482	pub fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
14483	unsafe { transmute(src:vpmovusdb(a.as_u32x16(), src:u8x16::ZERO, mask:k)) }
14484	}
14485
14486	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14487	///
14488	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi8&expand=2060)
14489	#[inline]
14490	#[target_feature(enable = "avx512f,avx512vl")]
14491	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14492	#[cfg_attr(test, assert_instr(vpmovusdb))]
14493	pub fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i {
14494	unsafe { transmute(src:vpmovusdb256(a.as_u32x8(), src:u8x16::ZERO, mask:`0b11111111`)) }
14495	}
14496
14497	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14498	///
14499	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi8&expand=2061)
14500	#[inline]
14501	#[target_feature(enable = "avx512f,avx512vl")]
14502	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14503	#[cfg_attr(test, assert_instr(vpmovusdb))]
14504	pub fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14505	unsafe { transmute(src:vpmovusdb256(a.as_u32x8(), src.as_u8x16(), mask:k)) }
14506	}
14507
14508	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14509	///
14510	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi8&expand=2062)
14511	#[inline]
14512	#[target_feature(enable = "avx512f,avx512vl")]
14513	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14514	#[cfg_attr(test, assert_instr(vpmovusdb))]
14515	pub fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
14516	unsafe { transmute(src:vpmovusdb256(a.as_u32x8(), src:u8x16::ZERO, mask:k)) }
14517	}
14518
14519	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14520	///
14521	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi8&expand=2057)
14522	#[inline]
14523	#[target_feature(enable = "avx512f,avx512vl")]
14524	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14525	#[cfg_attr(test, assert_instr(vpmovusdb))]
14526	pub fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i {
14527	unsafe { transmute(src:vpmovusdb128(a.as_u32x4(), src:u8x16::ZERO, mask:`0b11111111`)) }
14528	}
14529
14530	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14531	///
14532	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi8&expand=2058)
14533	#[inline]
14534	#[target_feature(enable = "avx512f,avx512vl")]
14535	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14536	#[cfg_attr(test, assert_instr(vpmovusdb))]
14537	pub fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14538	unsafe { transmute(src:vpmovusdb128(a.as_u32x4(), src.as_u8x16(), mask:k)) }
14539	}
14540
14541	/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14542	///
14543	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi8&expand=2059)
14544	#[inline]
14545	#[target_feature(enable = "avx512f,avx512vl")]
14546	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14547	#[cfg_attr(test, assert_instr(vpmovusdb))]
14548	pub fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
14549	unsafe { transmute(src:vpmovusdb128(a.as_u32x4(), src:u8x16::ZERO, mask:k)) }
14550	}
14551
14552	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14553	///
14554	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi32&expand=2087)
14555	#[inline]
14556	#[target_feature(enable = "avx512f")]
14557	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14558	#[cfg_attr(test, assert_instr(vpmovusqd))]
14559	pub fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i {
14560	unsafe { transmute(src:vpmovusqd(a.as_u64x8(), src:u32x8::ZERO, mask:`0b11111111`)) }
14561	}
14562
14563	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14564	///
14565	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi32&expand=2088)
14566	#[inline]
14567	#[target_feature(enable = "avx512f")]
14568	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14569	#[cfg_attr(test, assert_instr(vpmovusqd))]
14570	pub fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
14571	unsafe { transmute(src:vpmovusqd(a.as_u64x8(), src.as_u32x8(), mask:k)) }
14572	}
14573
14574	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14575	///
14576	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi32&expand=2089)
14577	#[inline]
14578	#[target_feature(enable = "avx512f")]
14579	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14580	#[cfg_attr(test, assert_instr(vpmovusqd))]
14581	pub fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
14582	unsafe { transmute(src:vpmovusqd(a.as_u64x8(), src:u32x8::ZERO, mask:k)) }
14583	}
14584
14585	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14586	///
14587	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi32&expand=2084)
14588	#[inline]
14589	#[target_feature(enable = "avx512f,avx512vl")]
14590	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14591	#[cfg_attr(test, assert_instr(vpmovusqd))]
14592	pub fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i {
14593	unsafe { transmute(src:vpmovusqd256(a.as_u64x4(), src:u32x4::ZERO, mask:`0b11111111`)) }
14594	}
14595
14596	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14597	///
14598	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi32&expand=2085)
14599	#[inline]
14600	#[target_feature(enable = "avx512f,avx512vl")]
14601	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14602	#[cfg_attr(test, assert_instr(vpmovusqd))]
14603	pub fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14604	unsafe { transmute(src:vpmovusqd256(a.as_u64x4(), src.as_u32x4(), mask:k)) }
14605	}
14606
14607	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14608	///
14609	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi32&expand=2086)
14610	#[inline]
14611	#[target_feature(enable = "avx512f,avx512vl")]
14612	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14613	#[cfg_attr(test, assert_instr(vpmovusqd))]
14614	pub fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
14615	unsafe { transmute(src:vpmovusqd256(a.as_u64x4(), src:u32x4::ZERO, mask:k)) }
14616	}
14617
14618	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14619	///
14620	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi32&expand=2081)
14621	#[inline]
14622	#[target_feature(enable = "avx512f,avx512vl")]
14623	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14624	#[cfg_attr(test, assert_instr(vpmovusqd))]
14625	pub fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i {
14626	unsafe { transmute(src:vpmovusqd128(a.as_u64x2(), src:u32x4::ZERO, mask:`0b11111111`)) }
14627	}
14628
14629	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14630	///
14631	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi32&expand=2082)
14632	#[inline]
14633	#[target_feature(enable = "avx512f,avx512vl")]
14634	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14635	#[cfg_attr(test, assert_instr(vpmovusqd))]
14636	pub fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14637	unsafe { transmute(src:vpmovusqd128(a.as_u64x2(), src.as_u32x4(), mask:k)) }
14638	}
14639
14640	/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14641	///
14642	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi32&expand=2083)
14643	#[inline]
14644	#[target_feature(enable = "avx512f,avx512vl")]
14645	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14646	#[cfg_attr(test, assert_instr(vpmovusqd))]
14647	pub fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
14648	unsafe { transmute(src:vpmovusqd128(a.as_u64x2(), src:u32x4::ZERO, mask:k)) }
14649	}
14650
14651	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14652	///
14653	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi16&expand=2078)
14654	#[inline]
14655	#[target_feature(enable = "avx512f")]
14656	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14657	#[cfg_attr(test, assert_instr(vpmovusqw))]
14658	pub fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i {
14659	unsafe { transmute(src:vpmovusqw(a.as_u64x8(), src:u16x8::ZERO, mask:`0b11111111`)) }
14660	}
14661
14662	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14663	///
14664	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi16&expand=2079)
14665	#[inline]
14666	#[target_feature(enable = "avx512f")]
14667	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14668	#[cfg_attr(test, assert_instr(vpmovusqw))]
14669	pub fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14670	unsafe { transmute(src:vpmovusqw(a.as_u64x8(), src.as_u16x8(), mask:k)) }
14671	}
14672
14673	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14674	///
14675	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi16&expand=2080)
14676	#[inline]
14677	#[target_feature(enable = "avx512f")]
14678	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14679	#[cfg_attr(test, assert_instr(vpmovusqw))]
14680	pub fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
14681	unsafe { transmute(src:vpmovusqw(a.as_u64x8(), src:u16x8::ZERO, mask:k)) }
14682	}
14683
14684	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14685	///
14686	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi16&expand=2075)
14687	#[inline]
14688	#[target_feature(enable = "avx512f,avx512vl")]
14689	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14690	#[cfg_attr(test, assert_instr(vpmovusqw))]
14691	pub fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i {
14692	unsafe { transmute(src:vpmovusqw256(a.as_u64x4(), src:u16x8::ZERO, mask:`0b11111111`)) }
14693	}
14694
14695	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14696	///
14697	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi16&expand=2076)
14698	#[inline]
14699	#[target_feature(enable = "avx512f,avx512vl")]
14700	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14701	#[cfg_attr(test, assert_instr(vpmovusqw))]
14702	pub fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14703	unsafe { transmute(src:vpmovusqw256(a.as_u64x4(), src.as_u16x8(), mask:k)) }
14704	}
14705
14706	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14707	///
14708	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi16&expand=2077)
14709	#[inline]
14710	#[target_feature(enable = "avx512f,avx512vl")]
14711	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14712	#[cfg_attr(test, assert_instr(vpmovusqw))]
14713	pub fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
14714	unsafe { transmute(src:vpmovusqw256(a.as_u64x4(), src:u16x8::ZERO, mask:k)) }
14715	}
14716
14717	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14718	///
14719	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi16&expand=2072)
14720	#[inline]
14721	#[target_feature(enable = "avx512f,avx512vl")]
14722	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14723	#[cfg_attr(test, assert_instr(vpmovusqw))]
14724	pub fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i {
14725	unsafe { transmute(src:vpmovusqw128(a.as_u64x2(), src:u16x8::ZERO, mask:`0b11111111`)) }
14726	}
14727
14728	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14729	///
14730	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi16&expand=2073)
14731	#[inline]
14732	#[target_feature(enable = "avx512f,avx512vl")]
14733	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14734	#[cfg_attr(test, assert_instr(vpmovusqw))]
14735	pub fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14736	unsafe { transmute(src:vpmovusqw128(a.as_u64x2(), src.as_u16x8(), mask:k)) }
14737	}
14738
14739	/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14740	///
14741	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi16&expand=2074)
14742	#[inline]
14743	#[target_feature(enable = "avx512f,avx512vl")]
14744	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14745	#[cfg_attr(test, assert_instr(vpmovusqw))]
14746	pub fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
14747	unsafe { transmute(src:vpmovusqw128(a.as_u64x2(), src:u16x8::ZERO, mask:k)) }
14748	}
14749
14750	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14751	///
14752	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi8&expand=2096)
14753	#[inline]
14754	#[target_feature(enable = "avx512f")]
14755	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14756	#[cfg_attr(test, assert_instr(vpmovusqb))]
14757	pub fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i {
14758	unsafe { transmute(src:vpmovusqb(a.as_u64x8(), src:u8x16::ZERO, mask:`0b11111111`)) }
14759	}
14760
14761	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14762	///
14763	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi8&expand=2097)
14764	#[inline]
14765	#[target_feature(enable = "avx512f")]
14766	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14767	#[cfg_attr(test, assert_instr(vpmovusqb))]
14768	pub fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14769	unsafe { transmute(src:vpmovusqb(a.as_u64x8(), src.as_u8x16(), mask:k)) }
14770	}
14771
14772	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14773	///
14774	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi8&expand=2098)
14775	#[inline]
14776	#[target_feature(enable = "avx512f")]
14777	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14778	#[cfg_attr(test, assert_instr(vpmovusqb))]
14779	pub fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
14780	unsafe { transmute(src:vpmovusqb(a.as_u64x8(), src:u8x16::ZERO, mask:k)) }
14781	}
14782
14783	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14784	///
14785	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi8&expand=2093)
14786	#[inline]
14787	#[target_feature(enable = "avx512f,avx512vl")]
14788	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14789	#[cfg_attr(test, assert_instr(vpmovusqb))]
14790	pub fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i {
14791	unsafe { transmute(src:vpmovusqb256(a.as_u64x4(), src:u8x16::ZERO, mask:`0b11111111`)) }
14792	}
14793
14794	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14795	///
14796	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi8&expand=2094)
14797	#[inline]
14798	#[target_feature(enable = "avx512f,avx512vl")]
14799	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14800	#[cfg_attr(test, assert_instr(vpmovusqb))]
14801	pub fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14802	unsafe { transmute(src:vpmovusqb256(a.as_u64x4(), src.as_u8x16(), mask:k)) }
14803	}
14804
14805	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14806	///
14807	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi8&expand=2095)
14808	#[inline]
14809	#[target_feature(enable = "avx512f,avx512vl")]
14810	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14811	#[cfg_attr(test, assert_instr(vpmovusqb))]
14812	pub fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
14813	unsafe { transmute(src:vpmovusqb256(a.as_u64x4(), src:u8x16::ZERO, mask:k)) }
14814	}
14815
14816	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14817	///
14818	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi8&expand=2090)
14819	#[inline]
14820	#[target_feature(enable = "avx512f,avx512vl")]
14821	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14822	#[cfg_attr(test, assert_instr(vpmovusqb))]
14823	pub fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i {
14824	unsafe { transmute(src:vpmovusqb128(a.as_u64x2(), src:u8x16::ZERO, mask:`0b11111111`)) }
14825	}
14826
14827	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14828	///
14829	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi8&expand=2091)
14830	#[inline]
14831	#[target_feature(enable = "avx512f,avx512vl")]
14832	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14833	#[cfg_attr(test, assert_instr(vpmovusqb))]
14834	pub fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14835	unsafe { transmute(src:vpmovusqb128(a.as_u64x2(), src.as_u8x16(), mask:k)) }
14836	}
14837
14838	/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14839	///
14840	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi8&expand=2092)
14841	#[inline]
14842	#[target_feature(enable = "avx512f,avx512vl")]
14843	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14844	#[cfg_attr(test, assert_instr(vpmovusqb))]
14845	pub fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
14846	unsafe { transmute(src:vpmovusqb128(a.as_u64x2(), src:u8x16::ZERO, mask:k)) }
14847	}
14848
14849	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
14850	///
14851	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
14852	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14853	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14854	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14855	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14856	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14857	///
14858	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi32&expand=1335)
14859	#[inline]
14860	#[target_feature(enable = "avx512f")]
14861	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14862	#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = `8`))]
14863	#[rustc_legacy_const_generics(`1`)]
14864	pub fn _mm512_cvt_roundps_epi32<const ROUNDING: i32>(a: __m512) -> __m512i {
14865	unsafe {
14866	static_assert_rounding!(ROUNDING);
14867	let a: Simd = a.as_f32x16();
14868	let r: Simd = vcvtps2dq(a, src:i32x16::ZERO, mask:`0b11111111_11111111`, ROUNDING);
14869	transmute(src:r)
14870	}
14871	}
14872
14873	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14874	///
14875	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14876	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14877	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14878	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14879	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14880	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14881	///
14882	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi32&expand=1336)
14883	#[inline]
14884	#[target_feature(enable = "avx512f")]
14885	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14886	#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = `8`))]
14887	#[rustc_legacy_const_generics(`3`)]
14888	pub fn _mm512_mask_cvt_roundps_epi32<const ROUNDING: i32>(
14889	src: __m512i,
14890	k: __mmask16,
14891	a: __m512,
14892	) -> __m512i {
14893	unsafe {
14894	static_assert_rounding!(ROUNDING);
14895	let a: Simd = a.as_f32x16();
14896	let src: Simd = src.as_i32x16();
14897	let r: Simd = vcvtps2dq(a, src, mask:k, ROUNDING);
14898	transmute(src:r)
14899	}
14900	}
14901
14902	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14903	///
14904	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14905	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14906	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14907	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14908	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14909	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14910	///
14911	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi32&expand=1337)
14912	#[inline]
14913	#[target_feature(enable = "avx512f")]
14914	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14915	#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = `8`))]
14916	#[rustc_legacy_const_generics(`2`)]
14917	pub fn _mm512_maskz_cvt_roundps_epi32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14918	unsafe {
14919	static_assert_rounding!(ROUNDING);
14920	let a: Simd = a.as_f32x16();
14921	let r: Simd = vcvtps2dq(a, src:i32x16::ZERO, mask:k, ROUNDING);
14922	transmute(src:r)
14923	}
14924	}
14925
14926	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14927	///
14928	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14929	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14930	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14931	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14932	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14933	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14934	///
14935	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu32&expand=1341)
14936	#[inline]
14937	#[target_feature(enable = "avx512f")]
14938	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14939	#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = `8`))]
14940	#[rustc_legacy_const_generics(`1`)]
14941	pub fn _mm512_cvt_roundps_epu32<const ROUNDING: i32>(a: __m512) -> __m512i {
14942	unsafe {
14943	static_assert_rounding!(ROUNDING);
14944	let a: Simd = a.as_f32x16();
14945	let r: Simd = vcvtps2udq(a, src:u32x16::ZERO, mask:`0b11111111_11111111`, ROUNDING);
14946	transmute(src:r)
14947	}
14948	}
14949
14950	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14951	///
14952	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14953	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14954	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14955	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14956	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14957	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14958	///
14959	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu32&expand=1342)
14960	#[inline]
14961	#[target_feature(enable = "avx512f")]
14962	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14963	#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = `8`))]
14964	#[rustc_legacy_const_generics(`3`)]
14965	pub fn _mm512_mask_cvt_roundps_epu32<const ROUNDING: i32>(
14966	src: __m512i,
14967	k: __mmask16,
14968	a: __m512,
14969	) -> __m512i {
14970	unsafe {
14971	static_assert_rounding!(ROUNDING);
14972	let a: Simd = a.as_f32x16();
14973	let src: Simd = src.as_u32x16();
14974	let r: Simd = vcvtps2udq(a, src, mask:k, ROUNDING);
14975	transmute(src:r)
14976	}
14977	}
14978
14979	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14980	///
14981	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14982	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14983	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14984	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14985	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14986	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14987	///
14988	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu32&expand=1343)
14989	#[inline]
14990	#[target_feature(enable = "avx512f")]
14991	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14992	#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = `8`))]
14993	#[rustc_legacy_const_generics(`2`)]
14994	pub fn _mm512_maskz_cvt_roundps_epu32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14995	unsafe {
14996	static_assert_rounding!(ROUNDING);
14997	let a: Simd = a.as_f32x16();
14998	let r: Simd = vcvtps2udq(a, src:u32x16::ZERO, mask:k, ROUNDING);
14999	transmute(src:r)
15000	}
15001	}
15002
15003	/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.\
15004	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15005	///
15006	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_pd&expand=1347)
15007	#[inline]
15008	#[target_feature(enable = "avx512f")]
15009	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15010	#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = `8`))]
15011	#[rustc_legacy_const_generics(`1`)]
15012	pub fn _mm512_cvt_roundps_pd<const SAE: i32>(a: __m256) -> __m512d {
15013	unsafe {
15014	static_assert_sae!(SAE);
15015	let a: Simd = a.as_f32x8();
15016	let r: Simd = vcvtps2pd(a, src:f64x8::ZERO, mask:`0b11111111`, SAE);
15017	transmute(src:r)
15018	}
15019	}
15020
15021	/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15022	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15023	///
15024	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_pd&expand=1336)
15025	#[inline]
15026	#[target_feature(enable = "avx512f")]
15027	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15028	#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = `8`))]
15029	#[rustc_legacy_const_generics(`3`)]
15030	pub fn _mm512_mask_cvt_roundps_pd<const SAE: i32>(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
15031	unsafe {
15032	static_assert_sae!(SAE);
15033	let a: Simd = a.as_f32x8();
15034	let src: Simd = src.as_f64x8();
15035	let r: Simd = vcvtps2pd(a, src, mask:k, SAE);
15036	transmute(src:r)
15037	}
15038	}
15039
15040	/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15041	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15042	///
15043	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_pd&expand=1337)
15044	#[inline]
15045	#[target_feature(enable = "avx512f")]
15046	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15047	#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = `8`))]
15048	#[rustc_legacy_const_generics(`2`)]
15049	pub fn _mm512_maskz_cvt_roundps_pd<const SAE: i32>(k: __mmask8, a: __m256) -> __m512d {
15050	unsafe {
15051	static_assert_sae!(SAE);
15052	let a: Simd = a.as_f32x8();
15053	let r: Simd = vcvtps2pd(a, src:f64x8::ZERO, mask:k, SAE);
15054	transmute(src:r)
15055	}
15056	}
15057
15058	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.\
15059	///
15060	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15061	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15062	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15063	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15064	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15065	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15066	///
15067	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi32&expand=1315)
15068	#[inline]
15069	#[target_feature(enable = "avx512f")]
15070	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15071	#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = `8`))]
15072	#[rustc_legacy_const_generics(`1`)]
15073	pub fn _mm512_cvt_roundpd_epi32<const ROUNDING: i32>(a: __m512d) -> __m256i {
15074	unsafe {
15075	static_assert_rounding!(ROUNDING);
15076	let a: Simd = a.as_f64x8();
15077	let r: Simd = vcvtpd2dq(a, src:i32x8::ZERO, mask:`0b11111111`, ROUNDING);
15078	transmute(src:r)
15079	}
15080	}
15081
15082	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15083	///
15084	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15085	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15086	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15087	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15088	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15089	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15090	///
15091	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi32&expand=1316)
15092	#[inline]
15093	#[target_feature(enable = "avx512f")]
15094	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15095	#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = `8`))]
15096	#[rustc_legacy_const_generics(`3`)]
15097	pub fn _mm512_mask_cvt_roundpd_epi32<const ROUNDING: i32>(
15098	src: __m256i,
15099	k: __mmask8,
15100	a: __m512d,
15101	) -> __m256i {
15102	unsafe {
15103	static_assert_rounding!(ROUNDING);
15104	let a: Simd = a.as_f64x8();
15105	let src: Simd = src.as_i32x8();
15106	let r: Simd = vcvtpd2dq(a, src, mask:k, ROUNDING);
15107	transmute(src:r)
15108	}
15109	}
15110
15111	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15112	///
15113	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15114	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15115	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15116	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15117	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15118	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15119	///
15120	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epi32&expand=1317)
15121	#[inline]
15122	#[target_feature(enable = "avx512f")]
15123	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15124	#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = `8`))]
15125	#[rustc_legacy_const_generics(`2`)]
15126	pub fn _mm512_maskz_cvt_roundpd_epi32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
15127	unsafe {
15128	static_assert_rounding!(ROUNDING);
15129	let a: Simd = a.as_f64x8();
15130	let r: Simd = vcvtpd2dq(a, src:i32x8::ZERO, mask:k, ROUNDING);
15131	transmute(src:r)
15132	}
15133	}
15134
15135	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
15136	///
15137	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15138	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15139	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15140	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15141	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15142	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15143	///
15144	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu32&expand=1321)
15145	#[inline]
15146	#[target_feature(enable = "avx512f")]
15147	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15148	#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = `8`))]
15149	#[rustc_legacy_const_generics(`1`)]
15150	pub fn _mm512_cvt_roundpd_epu32<const ROUNDING: i32>(a: __m512d) -> __m256i {
15151	unsafe {
15152	static_assert_rounding!(ROUNDING);
15153	let a: Simd = a.as_f64x8();
15154	let r: Simd = vcvtpd2udq(a, src:u32x8::ZERO, mask:`0b11111111`, ROUNDING);
15155	transmute(src:r)
15156	}
15157	}
15158
15159	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15160	///
15161	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15162	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15163	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15164	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15165	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15166	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15167	///
15168	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu32&expand=1322)
15169	#[inline]
15170	#[target_feature(enable = "avx512f")]
15171	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15172	#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = `8`))]
15173	#[rustc_legacy_const_generics(`3`)]
15174	pub fn _mm512_mask_cvt_roundpd_epu32<const ROUNDING: i32>(
15175	src: __m256i,
15176	k: __mmask8,
15177	a: __m512d,
15178	) -> __m256i {
15179	unsafe {
15180	static_assert_rounding!(ROUNDING);
15181	let a: Simd = a.as_f64x8();
15182	let src: Simd = src.as_u32x8();
15183	let r: Simd = vcvtpd2udq(a, src, mask:k, ROUNDING);
15184	transmute(src:r)
15185	}
15186	}
15187
15188	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15189	///
15190	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15191	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15192	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15193	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15194	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15195	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15196	///
15197	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epu32&expand=1323)
15198	#[inline]
15199	#[target_feature(enable = "avx512f")]
15200	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15201	#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = `8`))]
15202	#[rustc_legacy_const_generics(`2`)]
15203	pub fn _mm512_maskz_cvt_roundpd_epu32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
15204	unsafe {
15205	static_assert_rounding!(ROUNDING);
15206	let a: Simd = a.as_f64x8();
15207	let r: Simd = vcvtpd2udq(a, src:u32x8::ZERO, mask:k, ROUNDING);
15208	transmute(src:r)
15209	}
15210	}
15211
15212	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15213	///
15214	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15215	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15216	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15217	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15218	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15219	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15220	///
15221	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_ps&expand=1327)
15222	#[inline]
15223	#[target_feature(enable = "avx512f")]
15224	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15225	#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = `8`))]
15226	#[rustc_legacy_const_generics(`1`)]
15227	pub fn _mm512_cvt_roundpd_ps<const ROUNDING: i32>(a: __m512d) -> __m256 {
15228	unsafe {
15229	static_assert_rounding!(ROUNDING);
15230	let a: Simd = a.as_f64x8();
15231	let r: Simd = vcvtpd2ps(a, src:f32x8::ZERO, mask:`0b11111111`, ROUNDING);
15232	transmute(src:r)
15233	}
15234	}
15235
15236	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15237	///
15238	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15239	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15240	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15241	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15242	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15243	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15244	///
15245	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_ps&expand=1328)
15246	#[inline]
15247	#[target_feature(enable = "avx512f")]
15248	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15249	#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = `8`))]
15250	#[rustc_legacy_const_generics(`3`)]
15251	pub fn _mm512_mask_cvt_roundpd_ps<const ROUNDING: i32>(
15252	src: __m256,
15253	k: __mmask8,
15254	a: __m512d,
15255	) -> __m256 {
15256	unsafe {
15257	static_assert_rounding!(ROUNDING);
15258	let a: Simd = a.as_f64x8();
15259	let src: Simd = src.as_f32x8();
15260	let r: Simd = vcvtpd2ps(a, src, mask:k, ROUNDING);
15261	transmute(src:r)
15262	}
15263	}
15264
15265	/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15266	///
15267	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15268	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15269	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15270	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15271	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15272	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15273	///
15274	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_ps&expand=1329)
15275	#[inline]
15276	#[target_feature(enable = "avx512f")]
15277	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15278	#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = `8`))]
15279	#[rustc_legacy_const_generics(`2`)]
15280	pub fn _mm512_maskz_cvt_roundpd_ps<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256 {
15281	unsafe {
15282	static_assert_rounding!(ROUNDING);
15283	let a: Simd = a.as_f64x8();
15284	let r: Simd = vcvtpd2ps(a, src:f32x8::ZERO, mask:k, ROUNDING);
15285	transmute(src:r)
15286	}
15287	}
15288
15289	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15290	///
15291	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15292	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15293	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15294	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15295	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15296	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15297	///
15298	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi32_ps&expand=1294)
15299	#[inline]
15300	#[target_feature(enable = "avx512f")]
15301	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15302	#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = `8`))]
15303	#[rustc_legacy_const_generics(`1`)]
15304	pub fn _mm512_cvt_roundepi32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
15305	unsafe {
15306	static_assert_rounding!(ROUNDING);
15307	let a: Simd = a.as_i32x16();
15308	let r: Simd = vcvtdq2ps(a, ROUNDING);
15309	transmute(src:r)
15310	}
15311	}
15312
15313	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15314	///
15315	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15316	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15317	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15318	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15319	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15320	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15321	///
15322	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi32_ps&expand=1295)
15323	#[inline]
15324	#[target_feature(enable = "avx512f")]
15325	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15326	#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = `8`))]
15327	#[rustc_legacy_const_generics(`3`)]
15328	pub fn _mm512_mask_cvt_roundepi32_ps<const ROUNDING: i32>(
15329	src: __m512,
15330	k: __mmask16,
15331	a: __m512i,
15332	) -> __m512 {
15333	unsafe {
15334	static_assert_rounding!(ROUNDING);
15335	let a: Simd = a.as_i32x16();
15336	let r: Simd = vcvtdq2ps(a, ROUNDING);
15337	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
15338	}
15339	}
15340
15341	/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15342	///
15343	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15344	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15345	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15346	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15347	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15348	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15349	///
15350	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi32_ps&expand=1296)
15351	#[inline]
15352	#[target_feature(enable = "avx512f")]
15353	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15354	#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = `8`))]
15355	#[rustc_legacy_const_generics(`2`)]
15356	pub fn _mm512_maskz_cvt_roundepi32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
15357	unsafe {
15358	static_assert_rounding!(ROUNDING);
15359	let a: Simd = a.as_i32x16();
15360	let r: Simd = vcvtdq2ps(a, ROUNDING);
15361	transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
15362	}
15363	}
15364
15365	/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15366	///
15367	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15368	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15369	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15370	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15371	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15372	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15373	///
15374	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu32_ps&expand=1303)
15375	#[inline]
15376	#[target_feature(enable = "avx512f")]
15377	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15378	#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = `8`))]
15379	#[rustc_legacy_const_generics(`1`)]
15380	pub fn _mm512_cvt_roundepu32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
15381	unsafe {
15382	static_assert_rounding!(ROUNDING);
15383	let a: Simd = a.as_u32x16();
15384	let r: Simd = vcvtudq2ps(a, ROUNDING);
15385	transmute(src:r)
15386	}
15387	}
15388
15389	/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15390	///
15391	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15392	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15393	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15394	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15395	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15396	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15397	///
15398	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu32_ps&expand=1304)
15399	#[inline]
15400	#[target_feature(enable = "avx512f")]
15401	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15402	#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = `8`))]
15403	#[rustc_legacy_const_generics(`3`)]
15404	pub fn _mm512_mask_cvt_roundepu32_ps<const ROUNDING: i32>(
15405	src: __m512,
15406	k: __mmask16,
15407	a: __m512i,
15408	) -> __m512 {
15409	unsafe {
15410	static_assert_rounding!(ROUNDING);
15411	let a: Simd = a.as_u32x16();
15412	let r: Simd = vcvtudq2ps(a, ROUNDING);
15413	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
15414	}
15415	}
15416
15417	/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15418	///
15419	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15420	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15421	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15422	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15423	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15424	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15425	///
15426	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu32_ps&expand=1305)
15427	#[inline]
15428	#[target_feature(enable = "avx512f")]
15429	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15430	#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = `8`))]
15431	#[rustc_legacy_const_generics(`2`)]
15432	pub fn _mm512_maskz_cvt_roundepu32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
15433	unsafe {
15434	static_assert_rounding!(ROUNDING);
15435	let a: Simd = a.as_u32x16();
15436	let r: Simd = vcvtudq2ps(a, ROUNDING);
15437	transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
15438	}
15439	}
15440
15441	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15442	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15443	/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15444	/// * [`_MM_FROUND_TO_NEG_INF`] // round down
15445	/// * [`_MM_FROUND_TO_POS_INF`] // round up
15446	/// * [`_MM_FROUND_TO_ZERO`] // truncate
15447	/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15448	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15449	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
15450	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
15451	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
15452	/// * [`_MM_FROUND_CUR_DIRECTION`] \| [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15453	///
15454	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_ph&expand=1354)
15455	#[inline]
15456	#[target_feature(enable = "avx512f")]
15457	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15458	#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = `8`))]
15459	#[rustc_legacy_const_generics(`1`)]
15460	pub fn _mm512_cvt_roundps_ph<const ROUNDING: i32>(a: __m512) -> __m256i {
15461	unsafe {
15462	static_assert_extended_rounding!(ROUNDING);
15463	let a: Simd = a.as_f32x16();
15464	let r: Simd = vcvtps2ph(a, ROUNDING, src:i16x16::ZERO, mask:`0b11111111_11111111`);
15465	transmute(src:r)
15466	}
15467	}
15468
15469	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15470	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15471	/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15472	/// * [`_MM_FROUND_TO_NEG_INF`] // round down
15473	/// * [`_MM_FROUND_TO_POS_INF`] // round up
15474	/// * [`_MM_FROUND_TO_ZERO`] // truncate
15475	/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15476	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15477	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
15478	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
15479	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
15480	/// * [`_MM_FROUND_CUR_DIRECTION`] \| [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15481	///
15482	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_ph&expand=1355)
15483	#[inline]
15484	#[target_feature(enable = "avx512f")]
15485	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15486	#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = `8`))]
15487	#[rustc_legacy_const_generics(`3`)]
15488	pub fn _mm512_mask_cvt_roundps_ph<const ROUNDING: i32>(
15489	src: __m256i,
15490	k: __mmask16,
15491	a: __m512,
15492	) -> __m256i {
15493	unsafe {
15494	static_assert_extended_rounding!(ROUNDING);
15495	let a: Simd = a.as_f32x16();
15496	let src: Simd = src.as_i16x16();
15497	let r: Simd = vcvtps2ph(a, ROUNDING, src, mask:k);
15498	transmute(src:r)
15499	}
15500	}
15501
15502	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15503	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15504	/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15505	/// * [`_MM_FROUND_TO_NEG_INF`] // round down
15506	/// * [`_MM_FROUND_TO_POS_INF`] // round up
15507	/// * [`_MM_FROUND_TO_ZERO`] // truncate
15508	/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15509	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15510	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
15511	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
15512	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
15513	/// * [`_MM_FROUND_CUR_DIRECTION`] \| [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15514	///
15515	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_ph&expand=1356)
15516	#[inline]
15517	#[target_feature(enable = "avx512f")]
15518	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15519	#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = `8`))]
15520	#[rustc_legacy_const_generics(`2`)]
15521	pub fn _mm512_maskz_cvt_roundps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256i {
15522	unsafe {
15523	static_assert_extended_rounding!(ROUNDING);
15524	let a: Simd = a.as_f32x16();
15525	let r: Simd = vcvtps2ph(a, ROUNDING, src:i16x16::ZERO, mask:k);
15526	transmute(src:r)
15527	}
15528	}
15529
15530	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15531	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:
15532	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15533	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15534	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15535	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15536	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15537	///
15538	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvt_roundps_ph&expand=1352)
15539	#[inline]
15540	#[target_feature(enable = "avx512f,avx512vl")]
15541	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15542	#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = `8`))]
15543	#[rustc_legacy_const_generics(`3`)]
15544	pub fn _mm256_mask_cvt_roundps_ph<const IMM8: i32>(
15545	src: __m128i,
15546	k: __mmask8,
15547	a: __m256,
15548	) -> __m128i {
15549	unsafe {
15550	static_assert_uimm_bits!(IMM8, `8`);
15551	let a: Simd = a.as_f32x8();
15552	let src: Simd = src.as_i16x8();
15553	let r: Simd = vcvtps2ph256(a, IMM8, src, mask:k);
15554	transmute(src:r)
15555	}
15556	}
15557
15558	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15559	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15560	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15561	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15562	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15563	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15564	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15565	///
15566	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvt_roundps_ph&expand=1353)
15567	#[inline]
15568	#[target_feature(enable = "avx512f,avx512vl")]
15569	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15570	#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = `8`))]
15571	#[rustc_legacy_const_generics(`2`)]
15572	pub fn _mm256_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15573	unsafe {
15574	static_assert_uimm_bits!(IMM8, `8`);
15575	let a: Simd = a.as_f32x8();
15576	let r: Simd = vcvtps2ph256(a, IMM8, src:i16x8::ZERO, mask:k);
15577	transmute(src:r)
15578	}
15579	}
15580
15581	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15582	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15583	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15584	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15585	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15586	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15587	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15588	///
15589	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvt_roundps_ph&expand=1350)
15590	#[inline]
15591	#[target_feature(enable = "avx512f,avx512vl")]
15592	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15593	#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = `8`))]
15594	#[rustc_legacy_const_generics(`3`)]
15595	pub fn _mm_mask_cvt_roundps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15596	unsafe {
15597	static_assert_uimm_bits!(IMM8, `8`);
15598	let a: Simd = a.as_f32x4();
15599	let src: Simd = src.as_i16x8();
15600	let r: Simd = vcvtps2ph128(a, IMM8, src, mask:k);
15601	transmute(src:r)
15602	}
15603	}
15604
15605	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15606	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15607	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15608	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15609	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15610	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15611	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15612	///
15613	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvt_roundps_ph&expand=1351)
15614	#[inline]
15615	#[target_feature(enable = "avx512f,avx512vl")]
15616	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15617	#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = `8`))]
15618	#[rustc_legacy_const_generics(`2`)]
15619	pub fn _mm_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15620	unsafe {
15621	static_assert_uimm_bits!(IMM8, `8`);
15622	let a: Simd = a.as_f32x4();
15623	let r: Simd = vcvtps2ph128(a, IMM8, src:i16x8::ZERO, mask:k);
15624	transmute(src:r)
15625	}
15626	}
15627
15628	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15629	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15630	/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15631	/// * [`_MM_FROUND_TO_NEG_INF`] // round down
15632	/// * [`_MM_FROUND_TO_POS_INF`] // round up
15633	/// * [`_MM_FROUND_TO_ZERO`] // truncate
15634	/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15635	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15636	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
15637	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
15638	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
15639	/// * [`_MM_FROUND_CUR_DIRECTION`] \| [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15640	///
15641	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_ph&expand=1778)
15642	#[inline]
15643	#[target_feature(enable = "avx512f")]
15644	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15645	#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = `8`))]
15646	#[rustc_legacy_const_generics(`1`)]
15647	pub fn _mm512_cvtps_ph<const ROUNDING: i32>(a: __m512) -> __m256i {
15648	unsafe {
15649	static_assert_extended_rounding!(ROUNDING);
15650	let a: Simd = a.as_f32x16();
15651	let r: Simd = vcvtps2ph(a, ROUNDING, src:i16x16::ZERO, mask:`0b11111111_11111111`);
15652	transmute(src:r)
15653	}
15654	}
15655
15656	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15657	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15658	/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15659	/// * [`_MM_FROUND_TO_NEG_INF`] // round down
15660	/// * [`_MM_FROUND_TO_POS_INF`] // round up
15661	/// * [`_MM_FROUND_TO_ZERO`] // truncate
15662	/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15663	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15664	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
15665	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
15666	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
15667	/// * [`_MM_FROUND_CUR_DIRECTION`] \| [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15668	///
15669	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_ph&expand=1779)
15670	#[inline]
15671	#[target_feature(enable = "avx512f")]
15672	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15673	#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = `8`))]
15674	#[rustc_legacy_const_generics(`3`)]
15675	pub fn _mm512_mask_cvtps_ph<const ROUNDING: i32>(src: __m256i, k: __mmask16, a: __m512) -> __m256i {
15676	unsafe {
15677	static_assert_extended_rounding!(ROUNDING);
15678	let a: Simd = a.as_f32x16();
15679	let src: Simd = src.as_i16x16();
15680	let r: Simd = vcvtps2ph(a, ROUNDING, src, mask:k);
15681	transmute(src:r)
15682	}
15683	}
15684
15685	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15686	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15687	/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15688	/// * [`_MM_FROUND_TO_NEG_INF`] // round down
15689	/// * [`_MM_FROUND_TO_POS_INF`] // round up
15690	/// * [`_MM_FROUND_TO_ZERO`] // truncate
15691	/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15692	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15693	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
15694	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
15695	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
15696	/// * [`_MM_FROUND_CUR_DIRECTION`] \| [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15697	///
15698	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_ph&expand=1780)
15699	#[inline]
15700	#[target_feature(enable = "avx512f")]
15701	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15702	#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = `8`))]
15703	#[rustc_legacy_const_generics(`2`)]
15704	pub fn _mm512_maskz_cvtps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256i {
15705	unsafe {
15706	static_assert_extended_rounding!(ROUNDING);
15707	let a: Simd = a.as_f32x16();
15708	let r: Simd = vcvtps2ph(a, ROUNDING, src:i16x16::ZERO, mask:k);
15709	transmute(src:r)
15710	}
15711	}
15712
15713	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15714	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15715	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15716	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15717	/// * [`_MM_FROUND_TO_POS_INF`] : round up
15718	/// * [`_MM_FROUND_TO_ZERO`] : truncate
15719	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15720	///
15721	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_ph&expand=1776)
15722	#[inline]
15723	#[target_feature(enable = "avx512f,avx512vl")]
15724	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15725	#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = `8`))]
15726	#[rustc_legacy_const_generics(`3`)]
15727	pub fn _mm256_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m256) -> __m128i {
15728	unsafe {
15729	static_assert_uimm_bits!(IMM8, `8`);
15730	let a: Simd = a.as_f32x8();
15731	let src: Simd = src.as_i16x8();
15732	let r: Simd = vcvtps2ph256(a, IMM8, src, mask:k);
15733	transmute(src:r)
15734	}
15735	}
15736
15737	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15738	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15739	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15740	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15741	/// * [`_MM_FROUND_TO_POS_INF`] : round up
15742	/// * [`_MM_FROUND_TO_ZERO`] : truncate
15743	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15744	///
15745	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_ph&expand=1777)
15746	#[inline]
15747	#[target_feature(enable = "avx512f,avx512vl")]
15748	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15749	#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = `8`))]
15750	#[rustc_legacy_const_generics(`2`)]
15751	pub fn _mm256_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15752	unsafe {
15753	static_assert_uimm_bits!(IMM8, `8`);
15754	let a: Simd = a.as_f32x8();
15755	let r: Simd = vcvtps2ph256(a, IMM8, src:i16x8::ZERO, mask:k);
15756	transmute(src:r)
15757	}
15758	}
15759
15760	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15761	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15762	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15763	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15764	/// * [`_MM_FROUND_TO_POS_INF`] : round up
15765	/// * [`_MM_FROUND_TO_ZERO`] : truncate
15766	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15767	///
15768	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_ph&expand=1773)
15769	#[inline]
15770	#[target_feature(enable = "avx512f,avx512vl")]
15771	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15772	#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = `8`))]
15773	#[rustc_legacy_const_generics(`3`)]
15774	pub fn _mm_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15775	unsafe {
15776	static_assert_uimm_bits!(IMM8, `8`);
15777	let a: Simd = a.as_f32x4();
15778	let src: Simd = src.as_i16x8();
15779	let r: Simd = vcvtps2ph128(a, IMM8, src, mask:k);
15780	transmute(src:r)
15781	}
15782	}
15783
15784	/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15785	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15786	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15787	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15788	/// * [`_MM_FROUND_TO_POS_INF`] : round up
15789	/// * [`_MM_FROUND_TO_ZERO`] : truncate
15790	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15791	///
15792	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_ph&expand=1774)
15793	#[inline]
15794	#[target_feature(enable = "avx512f,avx512vl")]
15795	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15796	#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = `8`))]
15797	#[rustc_legacy_const_generics(`2`)]
15798	pub fn _mm_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15799	unsafe {
15800	static_assert_uimm_bits!(IMM8, `8`);
15801	let a: Simd = a.as_f32x4();
15802	let r: Simd = vcvtps2ph128(a, IMM8, src:i16x8::ZERO, mask:k);
15803	transmute(src:r)
15804	}
15805	}
15806
15807	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15808	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15809	///
15810	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundph_ps&expand=1332)
15811	#[inline]
15812	#[target_feature(enable = "avx512f")]
15813	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15814	#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = `8`))]
15815	#[rustc_legacy_const_generics(`1`)]
15816	pub fn _mm512_cvt_roundph_ps<const SAE: i32>(a: __m256i) -> __m512 {
15817	unsafe {
15818	static_assert_sae!(SAE);
15819	let a: Simd = a.as_i16x16();
15820	let r: Simd = vcvtph2ps(a, src:f32x16::ZERO, mask:`0b11111111_11111111`, SAE);
15821	transmute(src:r)
15822	}
15823	}
15824
15825	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15826	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15827	///
15828	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundph_ps&expand=1333)
15829	#[inline]
15830	#[target_feature(enable = "avx512f")]
15831	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15832	#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = `8`))]
15833	#[rustc_legacy_const_generics(`3`)]
15834	pub fn _mm512_mask_cvt_roundph_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15835	unsafe {
15836	static_assert_sae!(SAE);
15837	let a: Simd = a.as_i16x16();
15838	let src: Simd = src.as_f32x16();
15839	let r: Simd = vcvtph2ps(a, src, mask:k, SAE);
15840	transmute(src:r)
15841	}
15842	}
15843
15844	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15845	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15846	///
15847	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundph_ps&expand=1334)
15848	#[inline]
15849	#[target_feature(enable = "avx512f")]
15850	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15851	#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = `8`))]
15852	#[rustc_legacy_const_generics(`2`)]
15853	pub fn _mm512_maskz_cvt_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256i) -> __m512 {
15854	unsafe {
15855	static_assert_sae!(SAE);
15856	let a: Simd = a.as_i16x16();
15857	let r: Simd = vcvtph2ps(a, src:f32x16::ZERO, mask:k, SAE);
15858	transmute(src:r)
15859	}
15860	}
15861
15862	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
15863	///
15864	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtph_ps&expand=1723)
15865	#[inline]
15866	#[target_feature(enable = "avx512f")]
15867	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15868	#[cfg_attr(test, assert_instr(vcvtph2ps))]
15869	pub fn _mm512_cvtph_ps(a: __m256i) -> __m512 {
15870	unsafe {
15871	transmute(src:vcvtph2ps(
15872	a.as_i16x16(),
15873	src:f32x16::ZERO,
15874	mask:`0b11111111_11111111`,
15875	_MM_FROUND_NO_EXC,
15876	))
15877	}
15878	}
15879
15880	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15881	///
15882	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtph_ps&expand=1724)
15883	#[inline]
15884	#[target_feature(enable = "avx512f")]
15885	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15886	#[cfg_attr(test, assert_instr(vcvtph2ps))]
15887	pub fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15888	unsafe {
15889	transmute(src:vcvtph2ps(
15890	a.as_i16x16(),
15891	src.as_f32x16(),
15892	mask:k,
15893	_MM_FROUND_NO_EXC,
15894	))
15895	}
15896	}
15897
15898	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15899	///
15900	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtph_ps&expand=1725)
15901	#[inline]
15902	#[target_feature(enable = "avx512f")]
15903	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15904	#[cfg_attr(test, assert_instr(vcvtph2ps))]
15905	pub fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 {
15906	unsafe { transmute(src:vcvtph2ps(a.as_i16x16(), src:f32x16::ZERO, mask:k, _MM_FROUND_NO_EXC)) }
15907	}
15908
15909	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15910	///
15911	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtph_ps&expand=1721)
15912	#[inline]
15913	#[target_feature(enable = "avx512f,avx512vl")]
15914	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15915	#[cfg_attr(test, assert_instr(vcvtph2ps))]
15916	pub fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m256 {
15917	unsafe {
15918	let convert: __m256 = _mm256_cvtph_ps(a);
15919	transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x8(), no:src.as_f32x8()))
15920	}
15921	}
15922
15923	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15924	///
15925	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtph_ps&expand=1722)
15926	#[inline]
15927	#[target_feature(enable = "avx512f,avx512vl")]
15928	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15929	#[cfg_attr(test, assert_instr(vcvtph2ps))]
15930	pub fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 {
15931	unsafe {
15932	let convert: __m256 = _mm256_cvtph_ps(a);
15933	transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x8(), no:f32x8::ZERO))
15934	}
15935	}
15936
15937	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15938	///
15939	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtph_ps&expand=1718)
15940	#[inline]
15941	#[target_feature(enable = "avx512f,avx512vl")]
15942	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15943	#[cfg_attr(test, assert_instr(vcvtph2ps))]
15944	pub fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
15945	unsafe {
15946	let convert: __m128 = _mm_cvtph_ps(a);
15947	transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:src.as_f32x4()))
15948	}
15949	}
15950
15951	/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15952	///
15953	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtph_ps&expand=1719)
15954	#[inline]
15955	#[target_feature(enable = "avx512f,avx512vl")]
15956	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15957	#[cfg_attr(test, assert_instr(vcvtph2ps))]
15958	pub fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 {
15959	unsafe {
15960	let convert: __m128 = _mm_cvtph_ps(a);
15961	transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:f32x4::ZERO))
15962	}
15963	}
15964
15965	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15966	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15967	///
15968	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi32&expand=1916)
15969	#[inline]
15970	#[target_feature(enable = "avx512f")]
15971	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15972	#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = `8`))]
15973	#[rustc_legacy_const_generics(`1`)]
15974	pub fn _mm512_cvtt_roundps_epi32<const SAE: i32>(a: __m512) -> __m512i {
15975	unsafe {
15976	static_assert_sae!(SAE);
15977	let a: Simd = a.as_f32x16();
15978	let r: Simd = vcvttps2dq(a, src:i32x16::ZERO, mask:`0b11111111_11111111`, SAE);
15979	transmute(src:r)
15980	}
15981	}
15982
15983	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15984	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15985	///
15986	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi32&expand=1917)
15987	#[inline]
15988	#[target_feature(enable = "avx512f")]
15989	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15990	#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = `8`))]
15991	#[rustc_legacy_const_generics(`3`)]
15992	pub fn _mm512_mask_cvtt_roundps_epi32<const SAE: i32>(
15993	src: __m512i,
15994	k: __mmask16,
15995	a: __m512,
15996	) -> __m512i {
15997	unsafe {
15998	static_assert_sae!(SAE);
15999	let a: Simd = a.as_f32x16();
16000	let src: Simd = src.as_i32x16();
16001	let r: Simd = vcvttps2dq(a, src, mask:k, SAE);
16002	transmute(src:r)
16003	}
16004	}
16005
16006	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
16007	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16008	///
16009	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi32&expand=1918)
16010	#[inline]
16011	#[target_feature(enable = "avx512f")]
16012	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16013	#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = `8`))]
16014	#[rustc_legacy_const_generics(`2`)]
16015	pub fn _mm512_maskz_cvtt_roundps_epi32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
16016	unsafe {
16017	static_assert_sae!(SAE);
16018	let a: Simd = a.as_f32x16();
16019	let r: Simd = vcvttps2dq(a, src:i32x16::ZERO, mask:k, SAE);
16020	transmute(src:r)
16021	}
16022	}
16023
16024	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
16025	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16026	///
16027	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu32&expand=1922)
16028	#[inline]
16029	#[target_feature(enable = "avx512f")]
16030	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16031	#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = `8`))]
16032	#[rustc_legacy_const_generics(`1`)]
16033	pub fn _mm512_cvtt_roundps_epu32<const SAE: i32>(a: __m512) -> __m512i {
16034	unsafe {
16035	static_assert_sae!(SAE);
16036	let a: Simd = a.as_f32x16();
16037	let r: Simd = vcvttps2udq(a, src:u32x16::ZERO, mask:`0b11111111_11111111`, SAE);
16038	transmute(src:r)
16039	}
16040	}
16041
16042	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
16043	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16044	///
16045	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu32&expand=1923)
16046	#[inline]
16047	#[target_feature(enable = "avx512f")]
16048	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16049	#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = `8`))]
16050	#[rustc_legacy_const_generics(`3`)]
16051	pub fn _mm512_mask_cvtt_roundps_epu32<const SAE: i32>(
16052	src: __m512i,
16053	k: __mmask16,
16054	a: __m512,
16055	) -> __m512i {
16056	unsafe {
16057	static_assert_sae!(SAE);
16058	let a: Simd = a.as_f32x16();
16059	let src: Simd = src.as_u32x16();
16060	let r: Simd = vcvttps2udq(a, src, mask:k, SAE);
16061	transmute(src:r)
16062	}
16063	}
16064
16065	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
16066	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16067	///
16068	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu32&expand=1924)
16069	#[inline]
16070	#[target_feature(enable = "avx512f")]
16071	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16072	#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = `8`))]
16073	#[rustc_legacy_const_generics(`2`)]
16074	pub fn _mm512_maskz_cvtt_roundps_epu32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
16075	unsafe {
16076	static_assert_sae!(SAE);
16077	let a: Simd = a.as_f32x16();
16078	let r: Simd = vcvttps2udq(a, src:u32x16::ZERO, mask:k, SAE);
16079	transmute(src:r)
16080	}
16081	}
16082
16083	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
16084	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16085	///
16086	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi32&expand=1904)
16087	#[inline]
16088	#[target_feature(enable = "avx512f")]
16089	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16090	#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = `8`))]
16091	#[rustc_legacy_const_generics(`1`)]
16092	pub fn _mm512_cvtt_roundpd_epi32<const SAE: i32>(a: __m512d) -> __m256i {
16093	unsafe {
16094	static_assert_sae!(SAE);
16095	let a: Simd = a.as_f64x8();
16096	let r: Simd = vcvttpd2dq(a, src:i32x8::ZERO, mask:`0b11111111`, SAE);
16097	transmute(src:r)
16098	}
16099	}
16100
16101	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
16102	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16103	///
16104	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi32&expand=1905)
16105	#[inline]
16106	#[target_feature(enable = "avx512f")]
16107	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16108	#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = `8`))]
16109	#[rustc_legacy_const_generics(`3`)]
16110	pub fn _mm512_mask_cvtt_roundpd_epi32<const SAE: i32>(
16111	src: __m256i,
16112	k: __mmask8,
16113	a: __m512d,
16114	) -> __m256i {
16115	unsafe {
16116	static_assert_sae!(SAE);
16117	let a: Simd = a.as_f64x8();
16118	let src: Simd = src.as_i32x8();
16119	let r: Simd = vcvttpd2dq(a, src, mask:k, SAE);
16120	transmute(src:r)
16121	}
16122	}
16123
16124	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
16125	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16126	///
16127	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi32&expand=1918)
16128	#[inline]
16129	#[target_feature(enable = "avx512f")]
16130	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16131	#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = `8`))]
16132	#[rustc_legacy_const_generics(`2`)]
16133	pub fn _mm512_maskz_cvtt_roundpd_epi32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
16134	unsafe {
16135	static_assert_sae!(SAE);
16136	let a: Simd = a.as_f64x8();
16137	let r: Simd = vcvttpd2dq(a, src:i32x8::ZERO, mask:k, SAE);
16138	transmute(src:r)
16139	}
16140	}
16141
16142	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
16143	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16144	///
16145	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu32&expand=1910)
16146	#[inline]
16147	#[target_feature(enable = "avx512f")]
16148	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16149	#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = `8`))]
16150	#[rustc_legacy_const_generics(`1`)]
16151	pub fn _mm512_cvtt_roundpd_epu32<const SAE: i32>(a: __m512d) -> __m256i {
16152	unsafe {
16153	static_assert_sae!(SAE);
16154	let a: Simd = a.as_f64x8();
16155	let r: Simd = vcvttpd2udq(a, src:i32x8::ZERO, mask:`0b11111111`, SAE);
16156	transmute(src:r)
16157	}
16158	}
16159
16160	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
16161	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16162	///
16163	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu32&expand=1911)
16164	#[inline]
16165	#[target_feature(enable = "avx512f")]
16166	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16167	#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = `8`))]
16168	#[rustc_legacy_const_generics(`3`)]
16169	pub fn _mm512_mask_cvtt_roundpd_epu32<const SAE: i32>(
16170	src: __m256i,
16171	k: __mmask8,
16172	a: __m512d,
16173	) -> __m256i {
16174	unsafe {
16175	static_assert_sae!(SAE);
16176	let a: Simd = a.as_f64x8();
16177	let src: Simd = src.as_i32x8();
16178	let r: Simd = vcvttpd2udq(a, src, mask:k, SAE);
16179	transmute(src:r)
16180	}
16181	}
16182
16183	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
16184	///
16185	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi32&expand=1984)
16186	#[inline]
16187	#[target_feature(enable = "avx512f")]
16188	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16189	#[cfg_attr(test, assert_instr(vcvttps2dq))]
16190	pub fn _mm512_cvttps_epi32(a: __m512) -> __m512i {
16191	unsafe {
16192	transmute(src:vcvttps2dq(
16193	a.as_f32x16(),
16194	src:i32x16::ZERO,
16195	mask:`0b11111111_11111111`,
16196	_MM_FROUND_CUR_DIRECTION,
16197	))
16198	}
16199	}
16200
16201	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16202	///
16203	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi32&expand=1985)
16204	#[inline]
16205	#[target_feature(enable = "avx512f")]
16206	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16207	#[cfg_attr(test, assert_instr(vcvttps2dq))]
16208	pub fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
16209	unsafe {
16210	transmute(src:vcvttps2dq(
16211	a.as_f32x16(),
16212	src.as_i32x16(),
16213	mask:k,
16214	_MM_FROUND_CUR_DIRECTION,
16215	))
16216	}
16217	}
16218
16219	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16220	///
16221	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi32&expand=1986)
16222	#[inline]
16223	#[target_feature(enable = "avx512f")]
16224	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16225	#[cfg_attr(test, assert_instr(vcvttps2dq))]
16226	pub fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i {
16227	unsafe {
16228	transmute(src:vcvttps2dq(
16229	a.as_f32x16(),
16230	src:i32x16::ZERO,
16231	mask:k,
16232	_MM_FROUND_CUR_DIRECTION,
16233	))
16234	}
16235	}
16236
16237	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16238	///
16239	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi32&expand=1982)
16240	#[inline]
16241	#[target_feature(enable = "avx512f,avx512vl")]
16242	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16243	#[cfg_attr(test, assert_instr(vcvttps2dq))]
16244	pub fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
16245	unsafe { transmute(src:vcvttps2dq256(a.as_f32x8(), src.as_i32x8(), mask:k)) }
16246	}
16247
16248	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16249	///
16250	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi32&expand=1983)
16251	#[inline]
16252	#[target_feature(enable = "avx512f,avx512vl")]
16253	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16254	#[cfg_attr(test, assert_instr(vcvttps2dq))]
16255	pub fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i {
16256	unsafe { transmute(src:vcvttps2dq256(a.as_f32x8(), src:i32x8::ZERO, mask:k)) }
16257	}
16258
16259	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16260	///
16261	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi32&expand=1979)
16262	#[inline]
16263	#[target_feature(enable = "avx512f,avx512vl")]
16264	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16265	#[cfg_attr(test, assert_instr(vcvttps2dq))]
16266	pub fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
16267	unsafe { transmute(src:vcvttps2dq128(a.as_f32x4(), src.as_i32x4(), mask:k)) }
16268	}
16269
16270	/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16271	///
16272	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi32&expand=1980)
16273	#[inline]
16274	#[target_feature(enable = "avx512f,avx512vl")]
16275	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16276	#[cfg_attr(test, assert_instr(vcvttps2dq))]
16277	pub fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i {
16278	unsafe { transmute(src:vcvttps2dq128(a.as_f32x4(), src:i32x4::ZERO, mask:k)) }
16279	}
16280
16281	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16282	///
16283	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu32&expand=2002)
16284	#[inline]
16285	#[target_feature(enable = "avx512f")]
16286	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16287	#[cfg_attr(test, assert_instr(vcvttps2udq))]
16288	pub fn _mm512_cvttps_epu32(a: __m512) -> __m512i {
16289	unsafe {
16290	transmute(src:vcvttps2udq(
16291	a.as_f32x16(),
16292	src:u32x16::ZERO,
16293	mask:`0b11111111_11111111`,
16294	_MM_FROUND_CUR_DIRECTION,
16295	))
16296	}
16297	}
16298
16299	/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16300	///
16301	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu32&expand=2003)
16302	#[inline]
16303	#[target_feature(enable = "avx512f")]
16304	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16305	#[cfg_attr(test, assert_instr(vcvttps2udq))]
16306	pub fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
16307	unsafe {
16308	transmute(src:vcvttps2udq(
16309	a.as_f32x16(),
16310	src.as_u32x16(),
16311	mask:k,
16312	_MM_FROUND_CUR_DIRECTION,
16313	))
16314	}
16315	}
16316
16317	/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16318	///
16319	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu32&expand=2004)
16320	#[inline]
16321	#[target_feature(enable = "avx512f")]
16322	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16323	#[cfg_attr(test, assert_instr(vcvttps2udq))]
16324	pub fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i {
16325	unsafe {
16326	transmute(src:vcvttps2udq(
16327	a.as_f32x16(),
16328	src:u32x16::ZERO,
16329	mask:k,
16330	_MM_FROUND_CUR_DIRECTION,
16331	))
16332	}
16333	}
16334
16335	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16336	///
16337	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu32&expand=1999)
16338	#[inline]
16339	#[target_feature(enable = "avx512f,avx512vl")]
16340	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16341	#[cfg_attr(test, assert_instr(vcvttps2udq))]
16342	pub fn _mm256_cvttps_epu32(a: __m256) -> __m256i {
16343	unsafe { transmute(src:vcvttps2udq256(a.as_f32x8(), src:u32x8::ZERO, mask:`0b11111111`)) }
16344	}
16345
16346	/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16347	///
16348	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu32&expand=2000)
16349	#[inline]
16350	#[target_feature(enable = "avx512f,avx512vl")]
16351	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16352	#[cfg_attr(test, assert_instr(vcvttps2udq))]
16353	pub fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
16354	unsafe { transmute(src:vcvttps2udq256(a.as_f32x8(), src.as_u32x8(), mask:k)) }
16355	}
16356
16357	/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16358	///
16359	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu32&expand=2001)
16360	#[inline]
16361	#[target_feature(enable = "avx512f,avx512vl")]
16362	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16363	#[cfg_attr(test, assert_instr(vcvttps2udq))]
16364	pub fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i {
16365	unsafe { transmute(src:vcvttps2udq256(a.as_f32x8(), src:u32x8::ZERO, mask:k)) }
16366	}
16367
16368	/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16369	///
16370	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu32&expand=1996)
16371	#[inline]
16372	#[target_feature(enable = "avx512f,avx512vl")]
16373	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16374	#[cfg_attr(test, assert_instr(vcvttps2udq))]
16375	pub fn _mm_cvttps_epu32(a: __m128) -> __m128i {
16376	unsafe { transmute(src:vcvttps2udq128(a.as_f32x4(), src:u32x4::ZERO, mask:`0b11111111`)) }
16377	}
16378
16379	/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16380	///
16381	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu32&expand=1997)
16382	#[inline]
16383	#[target_feature(enable = "avx512f,avx512vl")]
16384	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16385	#[cfg_attr(test, assert_instr(vcvttps2udq))]
16386	pub fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
16387	unsafe { transmute(src:vcvttps2udq128(a.as_f32x4(), src.as_u32x4(), mask:k)) }
16388	}
16389
16390	/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16391	///
16392	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu32&expand=1998)
16393	#[inline]
16394	#[target_feature(enable = "avx512f,avx512vl")]
16395	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16396	#[cfg_attr(test, assert_instr(vcvttps2udq))]
16397	pub fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i {
16398	unsafe { transmute(src:vcvttps2udq128(a.as_f32x4(), src:u32x4::ZERO, mask:k)) }
16399	}
16400
16401	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
16402	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16403	///
16404	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu32&expand=1912)
16405	#[inline]
16406	#[target_feature(enable = "avx512f")]
16407	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16408	#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = `8`))]
16409	#[rustc_legacy_const_generics(`2`)]
16410	pub fn _mm512_maskz_cvtt_roundpd_epu32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
16411	unsafe {
16412	static_assert_sae!(SAE);
16413	let a: Simd = a.as_f64x8();
16414	let r: Simd = vcvttpd2udq(a, src:i32x8::ZERO, mask:k, SAE);
16415	transmute(src:r)
16416	}
16417	}
16418
16419	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
16420	///
16421	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi32&expand=1947)
16422	#[inline]
16423	#[target_feature(enable = "avx512f")]
16424	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16425	#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16426	pub fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i {
16427	unsafe {
16428	transmute(src:vcvttpd2dq(
16429	a.as_f64x8(),
16430	src:i32x8::ZERO,
16431	mask:`0b11111111`,
16432	_MM_FROUND_CUR_DIRECTION,
16433	))
16434	}
16435	}
16436
16437	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16438	///
16439	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi32&expand=1948)
16440	#[inline]
16441	#[target_feature(enable = "avx512f")]
16442	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16443	#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16444	pub fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
16445	unsafe {
16446	transmute(src:vcvttpd2dq(
16447	a.as_f64x8(),
16448	src.as_i32x8(),
16449	mask:k,
16450	_MM_FROUND_CUR_DIRECTION,
16451	))
16452	}
16453	}
16454
16455	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16456	///
16457	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi32&expand=1949)
16458	#[inline]
16459	#[target_feature(enable = "avx512f")]
16460	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16461	#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16462	pub fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
16463	unsafe {
16464	transmute(src:vcvttpd2dq(
16465	a.as_f64x8(),
16466	src:i32x8::ZERO,
16467	mask:k,
16468	_MM_FROUND_CUR_DIRECTION,
16469	))
16470	}
16471	}
16472
16473	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16474	///
16475	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi32&expand=1945)
16476	#[inline]
16477	#[target_feature(enable = "avx512f,avx512vl")]
16478	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16479	#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16480	pub fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16481	unsafe { transmute(src:vcvttpd2dq256(a.as_f64x4(), src.as_i32x4(), mask:k)) }
16482	}
16483
16484	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16485	///
16486	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi32&expand=1946)
16487	#[inline]
16488	#[target_feature(enable = "avx512f,avx512vl")]
16489	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16490	#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16491	pub fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
16492	unsafe { transmute(src:vcvttpd2dq256(a.as_f64x4(), src:i32x4::ZERO, mask:k)) }
16493	}
16494
16495	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16496	///
16497	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi32&expand=1942)
16498	#[inline]
16499	#[target_feature(enable = "avx512f,avx512vl")]
16500	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16501	#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16502	pub fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16503	unsafe { transmute(src:vcvttpd2dq128(a.as_f64x2(), src.as_i32x4(), mask:k)) }
16504	}
16505
16506	/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16507	///
16508	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi32&expand=1943)
16509	#[inline]
16510	#[target_feature(enable = "avx512f,avx512vl")]
16511	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16512	#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16513	pub fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
16514	unsafe { transmute(src:vcvttpd2dq128(a.as_f64x2(), src:i32x4::ZERO, mask:k)) }
16515	}
16516
16517	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16518	///
16519	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu32&expand=1965)
16520	#[inline]
16521	#[target_feature(enable = "avx512f")]
16522	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16523	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16524	pub fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i {
16525	unsafe {
16526	transmute(src:vcvttpd2udq(
16527	a.as_f64x8(),
16528	src:i32x8::ZERO,
16529	mask:`0b11111111`,
16530	_MM_FROUND_CUR_DIRECTION,
16531	))
16532	}
16533	}
16534
16535	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16536	///
16537	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu32&expand=1966)
16538	#[inline]
16539	#[target_feature(enable = "avx512f")]
16540	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16541	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16542	pub fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
16543	unsafe {
16544	transmute(src:vcvttpd2udq(
16545	a.as_f64x8(),
16546	src.as_i32x8(),
16547	mask:k,
16548	_MM_FROUND_CUR_DIRECTION,
16549	))
16550	}
16551	}
16552
16553	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16554	///
16555	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu32&expand=1967)
16556	#[inline]
16557	#[target_feature(enable = "avx512f")]
16558	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16559	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16560	pub fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
16561	unsafe {
16562	transmute(src:vcvttpd2udq(
16563	a.as_f64x8(),
16564	src:i32x8::ZERO,
16565	mask:k,
16566	_MM_FROUND_CUR_DIRECTION,
16567	))
16568	}
16569	}
16570
16571	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16572	///
16573	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu32&expand=1962)
16574	#[inline]
16575	#[target_feature(enable = "avx512f,avx512vl")]
16576	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16577	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16578	pub fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i {
16579	unsafe { transmute(src:vcvttpd2udq256(a.as_f64x4(), src:i32x4::ZERO, mask:`0b11111111`)) }
16580	}
16581
16582	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16583	///
16584	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu32&expand=1963)
16585	#[inline]
16586	#[target_feature(enable = "avx512f,avx512vl")]
16587	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16588	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16589	pub fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16590	unsafe { transmute(src:vcvttpd2udq256(a.as_f64x4(), src.as_i32x4(), mask:k)) }
16591	}
16592
16593	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16594	///
16595	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu32&expand=1964)
16596	#[inline]
16597	#[target_feature(enable = "avx512f,avx512vl")]
16598	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16599	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16600	pub fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
16601	unsafe { transmute(src:vcvttpd2udq256(a.as_f64x4(), src:i32x4::ZERO, mask:k)) }
16602	}
16603
16604	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16605	///
16606	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu32&expand=1959)
16607	#[inline]
16608	#[target_feature(enable = "avx512f,avx512vl")]
16609	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16610	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16611	pub fn _mm_cvttpd_epu32(a: __m128d) -> __m128i {
16612	unsafe { transmute(src:vcvttpd2udq128(a.as_f64x2(), src:i32x4::ZERO, mask:`0b11111111`)) }
16613	}
16614
16615	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16616	///
16617	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu32&expand=1960)
16618	#[inline]
16619	#[target_feature(enable = "avx512f,avx512vl")]
16620	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16621	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16622	pub fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16623	unsafe { transmute(src:vcvttpd2udq128(a.as_f64x2(), src.as_i32x4(), mask:k)) }
16624	}
16625
16626	/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16627	///
16628	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu32&expand=1961)
16629	#[inline]
16630	#[target_feature(enable = "avx512f,avx512vl")]
16631	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16632	#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16633	pub fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
16634	unsafe { transmute(src:vcvttpd2udq128(a.as_f64x2(), src:i32x4::ZERO, mask:k)) }
16635	}
16636
16637	/// Returns vector of type `__m512d` with all elements set to zero.
16638	///
16639	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_pd&expand=5018)
16640	#[inline]
16641	#[target_feature(enable = "avx512f")]
16642	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16643	#[cfg_attr(test, assert_instr(vxorps))]
16644	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16645	pub const fn _mm512_setzero_pd() -> __m512d {
16646	// All-0 is a properly initialized __m512d
16647	unsafe { const { mem::zeroed() } }
16648	}
16649
16650	/// Returns vector of type `__m512` with all elements set to zero.
16651	///
16652	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_ps&expand=5021)
16653	#[inline]
16654	#[target_feature(enable = "avx512f")]
16655	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16656	#[cfg_attr(test, assert_instr(vxorps))]
16657	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16658	pub const fn _mm512_setzero_ps() -> __m512 {
16659	// All-0 is a properly initialized __m512
16660	unsafe { const { mem::zeroed() } }
16661	}
16662
16663	/// Return vector of type `__m512` with all elements set to zero.
16664	///
16665	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero&expand=5014)
16666	#[inline]
16667	#[target_feature(enable = "avx512f")]
16668	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16669	#[cfg_attr(test, assert_instr(vxorps))]
16670	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16671	pub const fn _mm512_setzero() -> __m512 {
16672	// All-0 is a properly initialized __m512
16673	unsafe { const { mem::zeroed() } }
16674	}
16675
16676	/// Returns vector of type `__m512i` with all elements set to zero.
16677	///
16678	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_si512&expand=5024)
16679	#[inline]
16680	#[target_feature(enable = "avx512f")]
16681	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16682	#[cfg_attr(test, assert_instr(vxorps))]
16683	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16684	pub const fn _mm512_setzero_si512() -> __m512i {
16685	// All-0 is a properly initialized __m512i
16686	unsafe { const { mem::zeroed() } }
16687	}
16688
16689	/// Return vector of type `__m512i` with all elements set to zero.
16690	///
16691	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_epi32&expand=5015)
16692	#[inline]
16693	#[target_feature(enable = "avx512f")]
16694	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16695	#[cfg_attr(test, assert_instr(vxorps))]
16696	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16697	pub const fn _mm512_setzero_epi32() -> __m512i {
16698	// All-0 is a properly initialized __m512i
16699	unsafe { const { mem::zeroed() } }
16700	}
16701
16702	/// Sets packed 32-bit integers in `dst` with the supplied values in reverse
16703	/// order.
16704	///
16705	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi32&expand=4991)
16706	#[inline]
16707	#[target_feature(enable = "avx512f")]
16708	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16709	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16710	pub const fn _mm512_setr_epi32(
16711	e15: i32,
16712	e14: i32,
16713	e13: i32,
16714	e12: i32,
16715	e11: i32,
16716	e10: i32,
16717	e9: i32,
16718	e8: i32,
16719	e7: i32,
16720	e6: i32,
16721	e5: i32,
16722	e4: i32,
16723	e3: i32,
16724	e2: i32,
16725	e1: i32,
16726	e0: i32,
16727	) -> __m512i {
16728	unsafe {
16729	let r: Simd = i32x16::new(
16730	x0:e15, x1:e14, x2:e13, x3:e12, x4:e11, x5:e10, x6:e9, x7:e8, x8:e7, x9:e6, x10:e5, x11:e4, x12:e3, x13:e2, x14:e1, x15:e0,
16731	);
16732	transmute(src:r)
16733	}
16734	}
16735
16736	/// Set packed 8-bit integers in dst with the supplied values.
16737	///
16738	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi8&expand=4915)
16739	#[inline]
16740	#[target_feature(enable = "avx512f")]
16741	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16742	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16743	pub const fn _mm512_set_epi8(
16744	e63: i8,
16745	e62: i8,
16746	e61: i8,
16747	e60: i8,
16748	e59: i8,
16749	e58: i8,
16750	e57: i8,
16751	e56: i8,
16752	e55: i8,
16753	e54: i8,
16754	e53: i8,
16755	e52: i8,
16756	e51: i8,
16757	e50: i8,
16758	e49: i8,
16759	e48: i8,
16760	e47: i8,
16761	e46: i8,
16762	e45: i8,
16763	e44: i8,
16764	e43: i8,
16765	e42: i8,
16766	e41: i8,
16767	e40: i8,
16768	e39: i8,
16769	e38: i8,
16770	e37: i8,
16771	e36: i8,
16772	e35: i8,
16773	e34: i8,
16774	e33: i8,
16775	e32: i8,
16776	e31: i8,
16777	e30: i8,
16778	e29: i8,
16779	e28: i8,
16780	e27: i8,
16781	e26: i8,
16782	e25: i8,
16783	e24: i8,
16784	e23: i8,
16785	e22: i8,
16786	e21: i8,
16787	e20: i8,
16788	e19: i8,
16789	e18: i8,
16790	e17: i8,
16791	e16: i8,
16792	e15: i8,
16793	e14: i8,
16794	e13: i8,
16795	e12: i8,
16796	e11: i8,
16797	e10: i8,
16798	e9: i8,
16799	e8: i8,
16800	e7: i8,
16801	e6: i8,
16802	e5: i8,
16803	e4: i8,
16804	e3: i8,
16805	e2: i8,
16806	e1: i8,
16807	e0: i8,
16808	) -> __m512i {
16809	unsafe {
16810	let r: Simd = i8x64::new(
16811	x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15, x16:e16, x17:e17, x18:e18,
16812	x19:e19, x20:e20, x21:e21, x22:e22, x23:e23, x24:e24, x25:e25, x26:e26, x27:e27, x28:e28, x29:e29, x30:e30, x31:e31, x32:e32, x33:e33, x34:e34, x35:e35,
16813	x36:e36, x37:e37, x38:e38, x39:e39, x40:e40, x41:e41, x42:e42, x43:e43, x44:e44, x45:e45, x46:e46, x47:e47, x48:e48, x49:e49, x50:e50, x51:e51, x52:e52,
16814	x53:e53, x54:e54, x55:e55, x56:e56, x57:e57, x58:e58, x59:e59, x60:e60, x61:e61, x62:e62, x63:e63,
16815	);
16816	transmute(src:r)
16817	}
16818	}
16819
16820	/// Set packed 16-bit integers in dst with the supplied values.
16821	///
16822	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi16&expand=4905)
16823	#[inline]
16824	#[target_feature(enable = "avx512f")]
16825	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16826	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16827	pub const fn _mm512_set_epi16(
16828	e31: i16,
16829	e30: i16,
16830	e29: i16,
16831	e28: i16,
16832	e27: i16,
16833	e26: i16,
16834	e25: i16,
16835	e24: i16,
16836	e23: i16,
16837	e22: i16,
16838	e21: i16,
16839	e20: i16,
16840	e19: i16,
16841	e18: i16,
16842	e17: i16,
16843	e16: i16,
16844	e15: i16,
16845	e14: i16,
16846	e13: i16,
16847	e12: i16,
16848	e11: i16,
16849	e10: i16,
16850	e9: i16,
16851	e8: i16,
16852	e7: i16,
16853	e6: i16,
16854	e5: i16,
16855	e4: i16,
16856	e3: i16,
16857	e2: i16,
16858	e1: i16,
16859	e0: i16,
16860	) -> __m512i {
16861	unsafe {
16862	let r: Simd = i16x32::new(
16863	x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15, x16:e16, x17:e17, x18:e18,
16864	x19:e19, x20:e20, x21:e21, x22:e22, x23:e23, x24:e24, x25:e25, x26:e26, x27:e27, x28:e28, x29:e29, x30:e30, x31:e31,
16865	);
16866	transmute(src:r)
16867	}
16868	}
16869
16870	/// Set packed 32-bit integers in dst with the repeated 4 element sequence.
16871	///
16872	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi32&expand=4982)
16873	#[inline]
16874	#[target_feature(enable = "avx512f")]
16875	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16876	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16877	pub const fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16878	_mm512_set_epi32(e15:d, e14:c, e13:b, e12:a, e11:d, e10:c, e9:b, e8:a, e7:d, e6:c, e5:b, e4:a, e3:d, e2:c, e1:b, e0:a)
16879	}
16880
16881	/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence.
16882	///
16883	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_ps&expand=4985)
16884	#[inline]
16885	#[target_feature(enable = "avx512f")]
16886	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16887	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16888	pub const fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16889	_mm512_set_ps(e0:d, e1:c, e2:b, e3:a, e4:d, e5:c, e6:b, e7:a, e8:d, e9:c, e10:b, e11:a, e12:d, e13:c, e14:b, e15:a)
16890	}
16891
16892	/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence.
16893	///
16894	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_pd&expand=4984)
16895	#[inline]
16896	#[target_feature(enable = "avx512f")]
16897	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16898	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16899	pub const fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16900	_mm512_set_pd(e0:d, e1:c, e2:b, e3:a, e4:d, e5:c, e6:b, e7:a)
16901	}
16902
16903	/// Set packed 32-bit integers in dst with the repeated 4 element sequence in reverse order.
16904	///
16905	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi32&expand=5009)
16906	#[inline]
16907	#[target_feature(enable = "avx512f")]
16908	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16909	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16910	pub const fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16911	_mm512_set_epi32(e15:a, e14:b, e13:c, e12:d, e11:a, e10:b, e9:c, e8:d, e7:a, e6:b, e5:c, e4:d, e3:a, e2:b, e1:c, e0:d)
16912	}
16913
16914	/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16915	///
16916	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_ps&expand=5012)
16917	#[inline]
16918	#[target_feature(enable = "avx512f")]
16919	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16920	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16921	pub const fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16922	_mm512_set_ps(e0:a, e1:b, e2:c, e3:d, e4:a, e5:b, e6:c, e7:d, e8:a, e9:b, e10:c, e11:d, e12:a, e13:b, e14:c, e15:d)
16923	}
16924
16925	/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16926	///
16927	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_pd&expand=5011)
16928	#[inline]
16929	#[target_feature(enable = "avx512f")]
16930	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16931	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16932	pub const fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16933	_mm512_set_pd(e0:a, e1:b, e2:c, e3:d, e4:a, e5:b, e6:c, e7:d)
16934	}
16935
16936	/// Set packed 64-bit integers in dst with the supplied values.
16937	///
16938	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi64&expand=4910)
16939	#[inline]
16940	#[target_feature(enable = "avx512f")]
16941	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16942	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16943	pub const fn _mm512_set_epi64(
16944	e0: i64,
16945	e1: i64,
16946	e2: i64,
16947	e3: i64,
16948	e4: i64,
16949	e5: i64,
16950	e6: i64,
16951	e7: i64,
16952	) -> __m512i {
16953	_mm512_setr_epi64(e0:e7, e1:e6, e2:e5, e3:e4, e4:e3, e5:e2, e6:e1, e7:e0)
16954	}
16955
16956	/// Set packed 64-bit integers in dst with the supplied values in reverse order.
16957	///
16958	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi64&expand=4993)
16959	#[inline]
16960	#[target_feature(enable = "avx512f")]
16961	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16962	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16963	pub const fn _mm512_setr_epi64(
16964	e0: i64,
16965	e1: i64,
16966	e2: i64,
16967	e3: i64,
16968	e4: i64,
16969	e5: i64,
16970	e6: i64,
16971	e7: i64,
16972	) -> __m512i {
16973	unsafe {
16974	let r: Simd = i64x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7);
16975	transmute(src:r)
16976	}
16977	}
16978
16979	/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16980	///
16981	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_pd&expand=3002)
16982	#[inline]
16983	#[target_feature(enable = "avx512f")]
16984	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16985	#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = `1`))]
16986	#[rustc_legacy_const_generics(`2`)]
16987	pub unsafe fn _mm512_i32gather_pd<const SCALE: i32>(
16988	offsets: __m256i,
16989	slice: *const f64,
16990	) -> __m512d {
16991	static_assert_imm8_scale!(SCALE);
16992	let zero: Simd = f64x8::ZERO;
16993	let neg_one: i8 = `-1`;
16994	let slice: const i8 = slice as const i8;
16995	let offsets: Simd = offsets.as_i32x8();
16996	let r: Simd = vgatherdpd(src:zero, slice, offsets, mask:neg_one, SCALE);
16997	transmute(src:r)
16998	}
16999
17000	/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17001	///
17002	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_pd&expand=3003)
17003	#[inline]
17004	#[target_feature(enable = "avx512f")]
17005	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17006	#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = `1`))]
17007	#[rustc_legacy_const_generics(`4`)]
17008	pub unsafe fn _mm512_mask_i32gather_pd<const SCALE: i32>(
17009	src: __m512d,
17010	mask: __mmask8,
17011	offsets: __m256i,
17012	slice: *const f64,
17013	) -> __m512d {
17014	static_assert_imm8_scale!(SCALE);
17015	let src: Simd = src.as_f64x8();
17016	let slice: const i8 = slice as const i8;
17017	let offsets: Simd = offsets.as_i32x8();
17018	let r: Simd = vgatherdpd(src, slice, offsets, mask as i8, SCALE);
17019	transmute(src:r)
17020	}
17021
17022	/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17023	///
17024	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_pd&expand=3092)
17025	#[inline]
17026	#[target_feature(enable = "avx512f")]
17027	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17028	#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = `1`))]
17029	#[rustc_legacy_const_generics(`2`)]
17030	pub unsafe fn _mm512_i64gather_pd<const SCALE: i32>(
17031	offsets: __m512i,
17032	slice: *const f64,
17033	) -> __m512d {
17034	static_assert_imm8_scale!(SCALE);
17035	let zero: Simd = f64x8::ZERO;
17036	let neg_one: i8 = `-1`;
17037	let slice: const i8 = slice as const i8;
17038	let offsets: Simd = offsets.as_i64x8();
17039	let r: Simd = vgatherqpd(src:zero, slice, offsets, mask:neg_one, SCALE);
17040	transmute(src:r)
17041	}
17042
17043	/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17044	///
17045	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_pd&expand=3093)
17046	#[inline]
17047	#[target_feature(enable = "avx512f")]
17048	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17049	#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = `1`))]
17050	#[rustc_legacy_const_generics(`4`)]
17051	pub unsafe fn _mm512_mask_i64gather_pd<const SCALE: i32>(
17052	src: __m512d,
17053	mask: __mmask8,
17054	offsets: __m512i,
17055	slice: *const f64,
17056	) -> __m512d {
17057	static_assert_imm8_scale!(SCALE);
17058	let src: Simd = src.as_f64x8();
17059	let slice: const i8 = slice as const i8;
17060	let offsets: Simd = offsets.as_i64x8();
17061	let r: Simd = vgatherqpd(src, slice, offsets, mask as i8, SCALE);
17062	transmute(src:r)
17063	}
17064
17065	/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17066	///
17067	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_ps&expand=3100)
17068	#[inline]
17069	#[target_feature(enable = "avx512f")]
17070	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17071	#[cfg_attr(test, assert_instr(vgatherqps, SCALE = `1`))]
17072	#[rustc_legacy_const_generics(`2`)]
17073	pub unsafe fn _mm512_i64gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const f32) -> __m256 {
17074	static_assert_imm8_scale!(SCALE);
17075	let zero: Simd = f32x8::ZERO;
17076	let neg_one: i8 = `-1`;
17077	let slice: const i8 = slice as const i8;
17078	let offsets: Simd = offsets.as_i64x8();
17079	let r: Simd = vgatherqps(src:zero, slice, offsets, mask:neg_one, SCALE);
17080	transmute(src:r)
17081	}
17082
17083	/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17084	///
17085	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_ps&expand=3101)
17086	#[inline]
17087	#[target_feature(enable = "avx512f")]
17088	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17089	#[cfg_attr(test, assert_instr(vgatherqps, SCALE = `1`))]
17090	#[rustc_legacy_const_generics(`4`)]
17091	pub unsafe fn _mm512_mask_i64gather_ps<const SCALE: i32>(
17092	src: __m256,
17093	mask: __mmask8,
17094	offsets: __m512i,
17095	slice: *const f32,
17096	) -> __m256 {
17097	static_assert_imm8_scale!(SCALE);
17098	let src: Simd = src.as_f32x8();
17099	let slice: const i8 = slice as const i8;
17100	let offsets: Simd = offsets.as_i64x8();
17101	let r: Simd = vgatherqps(src, slice, offsets, mask as i8, SCALE);
17102	transmute(src:r)
17103	}
17104
17105	/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17106	///
17107	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_ps&expand=3010)
17108	#[inline]
17109	#[target_feature(enable = "avx512f")]
17110	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17111	#[cfg_attr(test, assert_instr(vgatherdps, SCALE = `1`))]
17112	#[rustc_legacy_const_generics(`2`)]
17113	pub unsafe fn _mm512_i32gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const f32) -> __m512 {
17114	static_assert_imm8_scale!(SCALE);
17115	let zero: Simd = f32x16::ZERO;
17116	let neg_one: i16 = `-1`;
17117	let slice: const i8 = slice as const i8;
17118	let offsets: Simd = offsets.as_i32x16();
17119	let r: Simd = vgatherdps(src:zero, slice, offsets, mask:neg_one, SCALE);
17120	transmute(src:r)
17121	}
17122
17123	/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17124	///
17125	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_ps&expand=3011)
17126	#[inline]
17127	#[target_feature(enable = "avx512f")]
17128	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17129	#[cfg_attr(test, assert_instr(vgatherdps, SCALE = `1`))]
17130	#[rustc_legacy_const_generics(`4`)]
17131	pub unsafe fn _mm512_mask_i32gather_ps<const SCALE: i32>(
17132	src: __m512,
17133	mask: __mmask16,
17134	offsets: __m512i,
17135	slice: *const f32,
17136	) -> __m512 {
17137	static_assert_imm8_scale!(SCALE);
17138	let src: Simd = src.as_f32x16();
17139	let slice: const i8 = slice as const i8;
17140	let offsets: Simd = offsets.as_i32x16();
17141	let r: Simd = vgatherdps(src, slice, offsets, mask as i16, SCALE);
17142	transmute(src:r)
17143	}
17144
17145	/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17146	///
17147	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi32&expand=2986)
17148	#[inline]
17149	#[target_feature(enable = "avx512f")]
17150	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17151	#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = `1`))]
17152	#[rustc_legacy_const_generics(`2`)]
17153	pub unsafe fn _mm512_i32gather_epi32<const SCALE: i32>(
17154	offsets: __m512i,
17155	slice: *const i32,
17156	) -> __m512i {
17157	static_assert_imm8_scale!(SCALE);
17158	let zero: Simd = i32x16::ZERO;
17159	let neg_one: i16 = `-1`;
17160	let slice: const i8 = slice as const i8;
17161	let offsets: Simd = offsets.as_i32x16();
17162	let r: Simd = vpgatherdd(src:zero, slice, offsets, mask:neg_one, SCALE);
17163	transmute(src:r)
17164	}
17165
17166	/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17167	///
17168	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi32&expand=2987)
17169	#[inline]
17170	#[target_feature(enable = "avx512f")]
17171	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17172	#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = `1`))]
17173	#[rustc_legacy_const_generics(`4`)]
17174	pub unsafe fn _mm512_mask_i32gather_epi32<const SCALE: i32>(
17175	src: __m512i,
17176	mask: __mmask16,
17177	offsets: __m512i,
17178	slice: *const i32,
17179	) -> __m512i {
17180	static_assert_imm8_scale!(SCALE);
17181	let src: Simd = src.as_i32x16();
17182	let mask: i16 = mask as i16;
17183	let slice: const i8 = slice as const i8;
17184	let offsets: Simd = offsets.as_i32x16();
17185	let r: Simd = vpgatherdd(src, slice, offsets, mask, SCALE);
17186	transmute(src:r)
17187	}
17188
17189	/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17190	///
17191	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi64&expand=2994)
17192	#[inline]
17193	#[target_feature(enable = "avx512f")]
17194	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17195	#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = `1`))]
17196	#[rustc_legacy_const_generics(`2`)]
17197	pub unsafe fn _mm512_i32gather_epi64<const SCALE: i32>(
17198	offsets: __m256i,
17199	slice: *const i64,
17200	) -> __m512i {
17201	static_assert_imm8_scale!(SCALE);
17202	let zero: Simd = i64x8::ZERO;
17203	let neg_one: i8 = `-1`;
17204	let slice: const i8 = slice as const i8;
17205	let offsets: Simd = offsets.as_i32x8();
17206	let r: Simd = vpgatherdq(src:zero, slice, offsets, mask:neg_one, SCALE);
17207	transmute(src:r)
17208	}
17209
17210	/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17211	///
17212	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi64&expand=2995)
17213	#[inline]
17214	#[target_feature(enable = "avx512f")]
17215	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17216	#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = `1`))]
17217	#[rustc_legacy_const_generics(`4`)]
17218	pub unsafe fn _mm512_mask_i32gather_epi64<const SCALE: i32>(
17219	src: __m512i,
17220	mask: __mmask8,
17221	offsets: __m256i,
17222	slice: *const i64,
17223	) -> __m512i {
17224	static_assert_imm8_scale!(SCALE);
17225	let src: Simd = src.as_i64x8();
17226	let mask: i8 = mask as i8;
17227	let slice: const i8 = slice as const i8;
17228	let offsets: Simd = offsets.as_i32x8();
17229	let r: Simd = vpgatherdq(src, slice, offsets, mask, SCALE);
17230	transmute(src:r)
17231	}
17232
17233	/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17234	///
17235	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi64&expand=3084)
17236	#[inline]
17237	#[target_feature(enable = "avx512f")]
17238	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17239	#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = `1`))]
17240	#[rustc_legacy_const_generics(`2`)]
17241	pub unsafe fn _mm512_i64gather_epi64<const SCALE: i32>(
17242	offsets: __m512i,
17243	slice: *const i64,
17244	) -> __m512i {
17245	static_assert_imm8_scale!(SCALE);
17246	let zero: Simd = i64x8::ZERO;
17247	let neg_one: i8 = `-1`;
17248	let slice: const i8 = slice as const i8;
17249	let offsets: Simd = offsets.as_i64x8();
17250	let r: Simd = vpgatherqq(src:zero, slice, offsets, mask:neg_one, SCALE);
17251	transmute(src:r)
17252	}
17253
17254	/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17255	///
17256	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi64&expand=3085)
17257	#[inline]
17258	#[target_feature(enable = "avx512f")]
17259	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17260	#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = `1`))]
17261	#[rustc_legacy_const_generics(`4`)]
17262	pub unsafe fn _mm512_mask_i64gather_epi64<const SCALE: i32>(
17263	src: __m512i,
17264	mask: __mmask8,
17265	offsets: __m512i,
17266	slice: *const i64,
17267	) -> __m512i {
17268	static_assert_imm8_scale!(SCALE);
17269	let src: Simd = src.as_i64x8();
17270	let mask: i8 = mask as i8;
17271	let slice: const i8 = slice as const i8;
17272	let offsets: Simd = offsets.as_i64x8();
17273	let r: Simd = vpgatherqq(src, slice, offsets, mask, SCALE);
17274	transmute(src:r)
17275	}
17276
17277	/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17278	///
17279	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi32&expand=3074)
17280	#[inline]
17281	#[target_feature(enable = "avx512f")]
17282	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17283	#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = `1`))]
17284	#[rustc_legacy_const_generics(`2`)]
17285	pub unsafe fn _mm512_i64gather_epi32<const SCALE: i32>(
17286	offsets: __m512i,
17287	slice: *const i32,
17288	) -> __m256i {
17289	static_assert_imm8_scale!(SCALE);
17290	let zeros: Simd = i32x8::ZERO;
17291	let neg_one: i8 = `-1`;
17292	let slice: const i8 = slice as const i8;
17293	let offsets: Simd = offsets.as_i64x8();
17294	let r: Simd = vpgatherqd(src:zeros, slice, offsets, mask:neg_one, SCALE);
17295	transmute(src:r)
17296	}
17297
17298	/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17299	///
17300	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi32&expand=3075)
17301	#[inline]
17302	#[target_feature(enable = "avx512f")]
17303	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17304	#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = `1`))]
17305	#[rustc_legacy_const_generics(`4`)]
17306	pub unsafe fn _mm512_mask_i64gather_epi32<const SCALE: i32>(
17307	src: __m256i,
17308	mask: __mmask8,
17309	offsets: __m512i,
17310	slice: *const i32,
17311	) -> __m256i {
17312	static_assert_imm8_scale!(SCALE);
17313	let src: Simd = src.as_i32x8();
17314	let mask: i8 = mask as i8;
17315	let slice: const i8 = slice as const i8;
17316	let offsets: Simd = offsets.as_i64x8();
17317	let r: Simd = vpgatherqd(src, slice, offsets, mask, SCALE);
17318	transmute(src:r)
17319	}
17320
17321	/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17322	///
17323	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_pd&expand=3044)
17324	#[inline]
17325	#[target_feature(enable = "avx512f")]
17326	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17327	#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = `1`))]
17328	#[rustc_legacy_const_generics(`3`)]
17329	pub unsafe fn _mm512_i32scatter_pd<const SCALE: i32>(
17330	slice: *mut f64,
17331	offsets: __m256i,
17332	src: __m512d,
17333	) {
17334	static_assert_imm8_scale!(SCALE);
17335	let src: Simd = src.as_f64x8();
17336	let neg_one: i8 = `-1`;
17337	let slice: mut i8 = slice as mut i8;
17338	let offsets: Simd = offsets.as_i32x8();
17339	vscatterdpd(slice, mask:neg_one, offsets, src, SCALE);
17340	}
17341
17342	/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17343	///
17344	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_pd&expand=3045)
17345	#[inline]
17346	#[target_feature(enable = "avx512f")]
17347	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17348	#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = `1`))]
17349	#[rustc_legacy_const_generics(`4`)]
17350	pub unsafe fn _mm512_mask_i32scatter_pd<const SCALE: i32>(
17351	slice: *mut f64,
17352	mask: __mmask8,
17353	offsets: __m256i,
17354	src: __m512d,
17355	) {
17356	static_assert_imm8_scale!(SCALE);
17357	let src: Simd = src.as_f64x8();
17358	let slice: mut i8 = slice as mut i8;
17359	let offsets: Simd = offsets.as_i32x8();
17360	vscatterdpd(slice, mask as i8, offsets, src, SCALE);
17361	}
17362
17363	/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17364	///
17365	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_pd&expand=3122)
17366	#[inline]
17367	#[target_feature(enable = "avx512f")]
17368	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17369	#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = `1`))]
17370	#[rustc_legacy_const_generics(`3`)]
17371	pub unsafe fn _mm512_i64scatter_pd<const SCALE: i32>(
17372	slice: *mut f64,
17373	offsets: __m512i,
17374	src: __m512d,
17375	) {
17376	static_assert_imm8_scale!(SCALE);
17377	let src: Simd = src.as_f64x8();
17378	let neg_one: i8 = `-1`;
17379	let slice: mut i8 = slice as mut i8;
17380	let offsets: Simd = offsets.as_i64x8();
17381	vscatterqpd(slice, mask:neg_one, offsets, src, SCALE);
17382	}
17383
17384	/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17385	///
17386	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_pd&expand=3123)
17387	#[inline]
17388	#[target_feature(enable = "avx512f")]
17389	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17390	#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = `1`))]
17391	#[rustc_legacy_const_generics(`4`)]
17392	pub unsafe fn _mm512_mask_i64scatter_pd<const SCALE: i32>(
17393	slice: *mut f64,
17394	mask: __mmask8,
17395	offsets: __m512i,
17396	src: __m512d,
17397	) {
17398	static_assert_imm8_scale!(SCALE);
17399	let src: Simd = src.as_f64x8();
17400	let slice: mut i8 = slice as mut i8;
17401	let offsets: Simd = offsets.as_i64x8();
17402	vscatterqpd(slice, mask as i8, offsets, src, SCALE);
17403	}
17404
17405	/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17406	///
17407	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_ps&expand=3050)
17408	#[inline]
17409	#[target_feature(enable = "avx512f")]
17410	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17411	#[cfg_attr(test, assert_instr(vscatterdps, SCALE = `1`))]
17412	#[rustc_legacy_const_generics(`3`)]
17413	pub unsafe fn _mm512_i32scatter_ps<const SCALE: i32>(
17414	slice: *mut f32,
17415	offsets: __m512i,
17416	src: __m512,
17417	) {
17418	static_assert_imm8_scale!(SCALE);
17419	let src: Simd = src.as_f32x16();
17420	let neg_one: i16 = `-1`;
17421	let slice: mut i8 = slice as mut i8;
17422	let offsets: Simd = offsets.as_i32x16();
17423	vscatterdps(slice, mask:neg_one, offsets, src, SCALE);
17424	}
17425
17426	/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17427	///
17428	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_ps&expand=3051)
17429	#[inline]
17430	#[target_feature(enable = "avx512f")]
17431	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17432	#[cfg_attr(test, assert_instr(vscatterdps, SCALE = `1`))]
17433	#[rustc_legacy_const_generics(`4`)]
17434	pub unsafe fn _mm512_mask_i32scatter_ps<const SCALE: i32>(
17435	slice: *mut f32,
17436	mask: __mmask16,
17437	offsets: __m512i,
17438	src: __m512,
17439	) {
17440	static_assert_imm8_scale!(SCALE);
17441	let src: Simd = src.as_f32x16();
17442	let slice: mut i8 = slice as mut i8;
17443	let offsets: Simd = offsets.as_i32x16();
17444	vscatterdps(slice, mask as i16, offsets, src, SCALE);
17445	}
17446
17447	/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17448	///
17449	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_ps&expand=3128)
17450	#[inline]
17451	#[target_feature(enable = "avx512f")]
17452	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17453	#[cfg_attr(test, assert_instr(vscatterqps, SCALE = `1`))]
17454	#[rustc_legacy_const_generics(`3`)]
17455	pub unsafe fn _mm512_i64scatter_ps<const SCALE: i32>(
17456	slice: *mut f32,
17457	offsets: __m512i,
17458	src: __m256,
17459	) {
17460	static_assert_imm8_scale!(SCALE);
17461	let src: Simd = src.as_f32x8();
17462	let neg_one: i8 = `-1`;
17463	let slice: mut i8 = slice as mut i8;
17464	let offsets: Simd = offsets.as_i64x8();
17465	vscatterqps(slice, mask:neg_one, offsets, src, SCALE);
17466	}
17467
17468	/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17469	///
17470	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_ps&expand=3129)
17471	#[inline]
17472	#[target_feature(enable = "avx512f")]
17473	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17474	#[cfg_attr(test, assert_instr(vscatterqps, SCALE = `1`))]
17475	#[rustc_legacy_const_generics(`4`)]
17476	pub unsafe fn _mm512_mask_i64scatter_ps<const SCALE: i32>(
17477	slice: *mut f32,
17478	mask: __mmask8,
17479	offsets: __m512i,
17480	src: __m256,
17481	) {
17482	static_assert_imm8_scale!(SCALE);
17483	let src: Simd = src.as_f32x8();
17484	let slice: mut i8 = slice as mut i8;
17485	let offsets: Simd = offsets.as_i64x8();
17486	vscatterqps(slice, mask as i8, offsets, src, SCALE);
17487	}
17488
17489	/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17490	///
17491	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi64&expand=3038)
17492	#[inline]
17493	#[target_feature(enable = "avx512f")]
17494	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17495	#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = `1`))]
17496	#[rustc_legacy_const_generics(`3`)]
17497	pub unsafe fn _mm512_i32scatter_epi64<const SCALE: i32>(
17498	slice: *mut i64,
17499	offsets: __m256i,
17500	src: __m512i,
17501	) {
17502	static_assert_imm8_scale!(SCALE);
17503	let src: Simd = src.as_i64x8();
17504	let neg_one: i8 = `-1`;
17505	let slice: mut i8 = slice as mut i8;
17506	let offsets: Simd = offsets.as_i32x8();
17507	vpscatterdq(slice, mask:neg_one, offsets, src, SCALE);
17508	}
17509
17510	/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17511	///
17512	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi64&expand=3039)
17513	#[inline]
17514	#[target_feature(enable = "avx512f")]
17515	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17516	#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = `1`))]
17517	#[rustc_legacy_const_generics(`4`)]
17518	pub unsafe fn _mm512_mask_i32scatter_epi64<const SCALE: i32>(
17519	slice: *mut i64,
17520	mask: __mmask8,
17521	offsets: __m256i,
17522	src: __m512i,
17523	) {
17524	static_assert_imm8_scale!(SCALE);
17525	let src: Simd = src.as_i64x8();
17526	let mask: i8 = mask as i8;
17527	let slice: mut i8 = slice as mut i8;
17528	let offsets: Simd = offsets.as_i32x8();
17529	vpscatterdq(slice, mask, offsets, src, SCALE);
17530	}
17531
17532	/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17533	///
17534	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi64&expand=3116)
17535	#[inline]
17536	#[target_feature(enable = "avx512f")]
17537	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17538	#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = `1`))]
17539	#[rustc_legacy_const_generics(`3`)]
17540	pub unsafe fn _mm512_i64scatter_epi64<const SCALE: i32>(
17541	slice: *mut i64,
17542	offsets: __m512i,
17543	src: __m512i,
17544	) {
17545	static_assert_imm8_scale!(SCALE);
17546	let src: Simd = src.as_i64x8();
17547	let neg_one: i8 = `-1`;
17548	let slice: mut i8 = slice as mut i8;
17549	let offsets: Simd = offsets.as_i64x8();
17550	vpscatterqq(slice, mask:neg_one, offsets, src, SCALE);
17551	}
17552
17553	/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17554	///
17555	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi64&expand=3117)
17556	#[inline]
17557	#[target_feature(enable = "avx512f")]
17558	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17559	#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = `1`))]
17560	#[rustc_legacy_const_generics(`4`)]
17561	pub unsafe fn _mm512_mask_i64scatter_epi64<const SCALE: i32>(
17562	slice: *mut i64,
17563	mask: __mmask8,
17564	offsets: __m512i,
17565	src: __m512i,
17566	) {
17567	static_assert_imm8_scale!(SCALE);
17568	let src: Simd = src.as_i64x8();
17569	let mask: i8 = mask as i8;
17570	let slice: mut i8 = slice as mut i8;
17571	let offsets: Simd = offsets.as_i64x8();
17572	vpscatterqq(slice, mask, offsets, src, SCALE);
17573	}
17574
17575	/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17576	///
17577	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi32&expand=3032)
17578	#[inline]
17579	#[target_feature(enable = "avx512f")]
17580	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17581	#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = `1`))]
17582	#[rustc_legacy_const_generics(`3`)]
17583	pub unsafe fn _mm512_i32scatter_epi32<const SCALE: i32>(
17584	slice: *mut i32,
17585	offsets: __m512i,
17586	src: __m512i,
17587	) {
17588	static_assert_imm8_scale!(SCALE);
17589	let src: Simd = src.as_i32x16();
17590	let neg_one: i16 = `-1`;
17591	let slice: mut i8 = slice as mut i8;
17592	let offsets: Simd = offsets.as_i32x16();
17593	vpscatterdd(slice, mask:neg_one, offsets, src, SCALE);
17594	}
17595
17596	/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17597	///
17598	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi32&expand=3033)
17599	#[inline]
17600	#[target_feature(enable = "avx512f")]
17601	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17602	#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = `1`))]
17603	#[rustc_legacy_const_generics(`4`)]
17604	pub unsafe fn _mm512_mask_i32scatter_epi32<const SCALE: i32>(
17605	slice: *mut i32,
17606	mask: __mmask16,
17607	offsets: __m512i,
17608	src: __m512i,
17609	) {
17610	static_assert_imm8_scale!(SCALE);
17611	let src: Simd = src.as_i32x16();
17612	let mask: i16 = mask as i16;
17613	let slice: mut i8 = slice as mut i8;
17614	let offsets: Simd = offsets.as_i32x16();
17615	vpscatterdd(slice, mask, offsets, src, SCALE);
17616	}
17617
17618	/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17619	///
17620	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi32&expand=3108)
17621	#[inline]
17622	#[target_feature(enable = "avx512f")]
17623	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17624	#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = `1`))]
17625	#[rustc_legacy_const_generics(`3`)]
17626	pub unsafe fn _mm512_i64scatter_epi32<const SCALE: i32>(
17627	slice: *mut i32,
17628	offsets: __m512i,
17629	src: __m256i,
17630	) {
17631	static_assert_imm8_scale!(SCALE);
17632	let src: Simd = src.as_i32x8();
17633	let neg_one: i8 = `-1`;
17634	let slice: mut i8 = slice as mut i8;
17635	let offsets: Simd = offsets.as_i64x8();
17636	vpscatterqd(slice, mask:neg_one, offsets, src, SCALE);
17637	}
17638
17639	/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17640	///
17641	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi32&expand=3109)
17642	#[inline]
17643	#[target_feature(enable = "avx512f")]
17644	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17645	#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = `1`))]
17646	#[rustc_legacy_const_generics(`4`)]
17647	pub unsafe fn _mm512_mask_i64scatter_epi32<const SCALE: i32>(
17648	slice: *mut i32,
17649	mask: __mmask8,
17650	offsets: __m512i,
17651	src: __m256i,
17652	) {
17653	static_assert_imm8_scale!(SCALE);
17654	let src: Simd = src.as_i32x8();
17655	let mask: i8 = mask as i8;
17656	let slice: mut i8 = slice as mut i8;
17657	let offsets: Simd = offsets.as_i64x8();
17658	vpscatterqd(slice, mask, offsets, src, SCALE);
17659	}
17660
17661	/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17662	/// indices stored in the lower half of vindex scaled by scale and stores them in dst.
17663	///
17664	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_epi64)
17665	#[inline]
17666	#[target_feature(enable = "avx512f")]
17667	#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = `1`))]
17668	#[rustc_legacy_const_generics(`2`)]
17669	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17670	pub unsafe fn _mm512_i32logather_epi64<const SCALE: i32>(
17671	vindex: __m512i,
17672	base_addr: *const i64,
17673	) -> __m512i {
17674	_mm512_i32gather_epi64::<SCALE>(offsets:_mm512_castsi512_si256(vindex), slice:base_addr)
17675	}
17676
17677	/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17678	/// indices stored in the lower half of vindex scaled by scale and stores them in dst using writemask k
17679	/// (elements are copied from src when the corresponding mask bit is not set).
17680	///
17681	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_epi64)
17682	#[inline]
17683	#[target_feature(enable = "avx512f")]
17684	#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = `1`))]
17685	#[rustc_legacy_const_generics(`4`)]
17686	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17687	pub unsafe fn _mm512_mask_i32logather_epi64<const SCALE: i32>(
17688	src: __m512i,
17689	k: __mmask8,
17690	vindex: __m512i,
17691	base_addr: *const i64,
17692	) -> __m512i {
17693	_mm512_mask_i32gather_epi64::<SCALE>(src, mask:k, offsets:_mm512_castsi512_si256(vindex), slice:base_addr)
17694	}
17695
17696	/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17697	/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst.
17698	///
17699	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_pd)
17700	#[inline]
17701	#[target_feature(enable = "avx512f")]
17702	#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = `1`))]
17703	#[rustc_legacy_const_generics(`2`)]
17704	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17705	pub unsafe fn _mm512_i32logather_pd<const SCALE: i32>(
17706	vindex: __m512i,
17707	base_addr: *const f64,
17708	) -> __m512d {
17709	_mm512_i32gather_pd::<SCALE>(offsets:_mm512_castsi512_si256(vindex), slice:base_addr)
17710	}
17711
17712	/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17713	/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst
17714	/// using writemask k (elements are copied from src when the corresponding mask bit is not set).
17715	///
17716	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_pd)
17717	#[inline]
17718	#[target_feature(enable = "avx512f")]
17719	#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = `1`))]
17720	#[rustc_legacy_const_generics(`4`)]
17721	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17722	pub unsafe fn _mm512_mask_i32logather_pd<const SCALE: i32>(
17723	src: __m512d,
17724	k: __mmask8,
17725	vindex: __m512i,
17726	base_addr: *const f64,
17727	) -> __m512d {
17728	_mm512_mask_i32gather_pd::<SCALE>(src, mask:k, offsets:_mm512_castsi512_si256(vindex), slice:base_addr)
17729	}
17730
17731	/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17732	/// indices stored in the lower half of vindex scaled by scale.
17733	///
17734	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_epi64)
17735	#[inline]
17736	#[target_feature(enable = "avx512f")]
17737	#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = `1`))]
17738	#[rustc_legacy_const_generics(`3`)]
17739	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17740	pub unsafe fn _mm512_i32loscatter_epi64<const SCALE: i32>(
17741	base_addr: *mut i64,
17742	vindex: __m512i,
17743	a: __m512i,
17744	) {
17745	_mm512_i32scatter_epi64::<SCALE>(slice:base_addr, offsets:_mm512_castsi512_si256(vindex), src:a)
17746	}
17747
17748	/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17749	/// indices stored in the lower half of vindex scaled by scale using writemask k (elements whose corresponding
17750	/// mask bit is not set are not written to memory).
17751	///
17752	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_epi64)
17753	#[inline]
17754	#[target_feature(enable = "avx512f")]
17755	#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = `1`))]
17756	#[rustc_legacy_const_generics(`4`)]
17757	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17758	pub unsafe fn _mm512_mask_i32loscatter_epi64<const SCALE: i32>(
17759	base_addr: *mut i64,
17760	k: __mmask8,
17761	vindex: __m512i,
17762	a: __m512i,
17763	) {
17764	_mm512_mask_i32scatter_epi64::<SCALE>(slice:base_addr, mask:k, offsets:_mm512_castsi512_si256(vindex), src:a)
17765	}
17766
17767	/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17768	/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale.
17769	///
17770	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_pd)
17771	#[inline]
17772	#[target_feature(enable = "avx512f")]
17773	#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = `1`))]
17774	#[rustc_legacy_const_generics(`3`)]
17775	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17776	pub unsafe fn _mm512_i32loscatter_pd<const SCALE: i32>(
17777	base_addr: *mut f64,
17778	vindex: __m512i,
17779	a: __m512d,
17780	) {
17781	_mm512_i32scatter_pd::<SCALE>(slice:base_addr, offsets:_mm512_castsi512_si256(vindex), src:a)
17782	}
17783
17784	/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17785	/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale using writemask k
17786	/// (elements whose corresponding mask bit is not set are not written to memory).
17787	///
17788	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_pd)
17789	#[inline]
17790	#[target_feature(enable = "avx512f")]
17791	#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = `1`))]
17792	#[rustc_legacy_const_generics(`4`)]
17793	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17794	pub unsafe fn _mm512_mask_i32loscatter_pd<const SCALE: i32>(
17795	base_addr: *mut f64,
17796	k: __mmask8,
17797	vindex: __m512i,
17798	a: __m512d,
17799	) {
17800	_mm512_mask_i32scatter_pd::<SCALE>(slice:base_addr, mask:k, offsets:_mm512_castsi512_si256(vindex), src:a)
17801	}
17802
17803	/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17804	/// indices stored in vindex scaled by scale
17805	///
17806	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_epi32)
17807	#[inline]
17808	#[target_feature(enable = "avx512f,avx512vl")]
17809	#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = `1`))]
17810	#[rustc_legacy_const_generics(`3`)]
17811	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17812	pub unsafe fn _mm256_i32scatter_epi32<const SCALE: i32>(
17813	base_addr: *mut i32,
17814	vindex: __m256i,
17815	a: __m256i,
17816	) {
17817	static_assert_imm8_scale!(SCALE);
17818	vpscatterdd_256(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i32x8(), src:a.as_i32x8(), SCALE)
17819	}
17820
17821	/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17822	/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17823	/// are not written to memory).
17824	///
17825	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi32)
17826	#[inline]
17827	#[target_feature(enable = "avx512f,avx512vl")]
17828	#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = `1`))]
17829	#[rustc_legacy_const_generics(`4`)]
17830	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17831	pub unsafe fn _mm256_mask_i32scatter_epi32<const SCALE: i32>(
17832	base_addr: *mut i32,
17833	k: __mmask8,
17834	vindex: __m256i,
17835	a: __m256i,
17836	) {
17837	static_assert_imm8_scale!(SCALE);
17838	vpscatterdd_256(slice:base_addr as _, k, offsets:vindex.as_i32x8(), src:a.as_i32x8(), SCALE)
17839	}
17840
17841	/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17842	///
17843	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32scatter_epi64&expand=4099)
17844	#[inline]
17845	#[target_feature(enable = "avx512f,avx512vl")]
17846	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17847	#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = `1`))]
17848	#[rustc_legacy_const_generics(`3`)]
17849	pub unsafe fn _mm256_i32scatter_epi64<const SCALE: i32>(
17850	slice: *mut i64,
17851	offsets: __m128i,
17852	src: __m256i,
17853	) {
17854	static_assert_imm8_scale!(SCALE);
17855	let src: Simd = src.as_i64x4();
17856	let slice: mut i8 = slice as mut i8;
17857	let offsets: Simd = offsets.as_i32x4();
17858	vpscatterdq_256(slice, k:`0xff`, offsets, src, SCALE);
17859	}
17860
17861	/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17862	/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17863	/// are not written to memory).
17864	///
17865	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi64)
17866	#[inline]
17867	#[target_feature(enable = "avx512f,avx512vl")]
17868	#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = `1`))]
17869	#[rustc_legacy_const_generics(`4`)]
17870	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17871	pub unsafe fn _mm256_mask_i32scatter_epi64<const SCALE: i32>(
17872	base_addr: *mut i64,
17873	k: __mmask8,
17874	vindex: __m128i,
17875	a: __m256i,
17876	) {
17877	static_assert_imm8_scale!(SCALE);
17878	vpscatterdq_256(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_i64x4(), SCALE)
17879	}
17880
17881	/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17882	/// at packed 32-bit integer indices stored in vindex scaled by scale
17883	///
17884	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_pd)
17885	#[inline]
17886	#[target_feature(enable = "avx512f,avx512vl")]
17887	#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = `1`))]
17888	#[rustc_legacy_const_generics(`3`)]
17889	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17890	pub unsafe fn _mm256_i32scatter_pd<const SCALE: i32>(
17891	base_addr: *mut f64,
17892	vindex: __m128i,
17893	a: __m256d,
17894	) {
17895	static_assert_imm8_scale!(SCALE);
17896	vscatterdpd_256(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i32x4(), src:a.as_f64x4(), SCALE)
17897	}
17898
17899	/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17900	/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17901	/// mask bit is not set are not written to memory).
17902	///
17903	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_pd)
17904	#[inline]
17905	#[target_feature(enable = "avx512f,avx512vl")]
17906	#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = `1`))]
17907	#[rustc_legacy_const_generics(`4`)]
17908	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17909	pub unsafe fn _mm256_mask_i32scatter_pd<const SCALE: i32>(
17910	base_addr: *mut f64,
17911	k: __mmask8,
17912	vindex: __m128i,
17913	a: __m256d,
17914	) {
17915	static_assert_imm8_scale!(SCALE);
17916	vscatterdpd_256(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_f64x4(), SCALE)
17917	}
17918
17919	/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17920	/// at packed 32-bit integer indices stored in vindex scaled by scale
17921	///
17922	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_ps)
17923	#[inline]
17924	#[target_feature(enable = "avx512f,avx512vl")]
17925	#[cfg_attr(test, assert_instr(vscatterdps, SCALE = `1`))]
17926	#[rustc_legacy_const_generics(`3`)]
17927	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17928	pub unsafe fn _mm256_i32scatter_ps<const SCALE: i32>(
17929	base_addr: *mut f32,
17930	vindex: __m256i,
17931	a: __m256,
17932	) {
17933	static_assert_imm8_scale!(SCALE);
17934	vscatterdps_256(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i32x8(), src:a.as_f32x8(), SCALE)
17935	}
17936
17937	/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17938	/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17939	/// mask bit is not set are not written to memory).
17940	///
17941	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_ps)
17942	#[inline]
17943	#[target_feature(enable = "avx512f,avx512vl")]
17944	#[cfg_attr(test, assert_instr(vscatterdps, SCALE = `1`))]
17945	#[rustc_legacy_const_generics(`4`)]
17946	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17947	pub unsafe fn _mm256_mask_i32scatter_ps<const SCALE: i32>(
17948	base_addr: *mut f32,
17949	k: __mmask8,
17950	vindex: __m256i,
17951	a: __m256,
17952	) {
17953	static_assert_imm8_scale!(SCALE);
17954	vscatterdps_256(slice:base_addr as _, k, offsets:vindex.as_i32x8(), src:a.as_f32x8(), SCALE)
17955	}
17956
17957	/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17958	/// indices stored in vindex scaled by scale
17959	///
17960	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi32)
17961	#[inline]
17962	#[target_feature(enable = "avx512f,avx512vl")]
17963	#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = `1`))]
17964	#[rustc_legacy_const_generics(`3`)]
17965	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17966	pub unsafe fn _mm256_i64scatter_epi32<const SCALE: i32>(
17967	base_addr: *mut i32,
17968	vindex: __m256i,
17969	a: __m128i,
17970	) {
17971	static_assert_imm8_scale!(SCALE);
17972	vpscatterqd_256(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i64x4(), src:a.as_i32x4(), SCALE)
17973	}
17974
17975	/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17976	/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17977	/// are not written to memory).
17978	///
17979	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi32)
17980	#[inline]
17981	#[target_feature(enable = "avx512f,avx512vl")]
17982	#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = `1`))]
17983	#[rustc_legacy_const_generics(`4`)]
17984	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17985	pub unsafe fn _mm256_mask_i64scatter_epi32<const SCALE: i32>(
17986	base_addr: *mut i32,
17987	k: __mmask8,
17988	vindex: __m256i,
17989	a: __m128i,
17990	) {
17991	static_assert_imm8_scale!(SCALE);
17992	vpscatterqd_256(slice:base_addr as _, k, offsets:vindex.as_i64x4(), src:a.as_i32x4(), SCALE)
17993	}
17994
17995	/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17996	/// indices stored in vindex scaled by scale
17997	///
17998	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi64)
17999	#[inline]
18000	#[target_feature(enable = "avx512f,avx512vl")]
18001	#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = `1`))]
18002	#[rustc_legacy_const_generics(`3`)]
18003	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18004	pub unsafe fn _mm256_i64scatter_epi64<const SCALE: i32>(
18005	base_addr: *mut i64,
18006	vindex: __m256i,
18007	a: __m256i,
18008	) {
18009	static_assert_imm8_scale!(SCALE);
18010	vpscatterqq_256(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i64x4(), src:a.as_i64x4(), SCALE)
18011	}
18012
18013	/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18014	/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18015	/// are not written to memory).
18016	///
18017	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi64)
18018	#[inline]
18019	#[target_feature(enable = "avx512f,avx512vl")]
18020	#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = `1`))]
18021	#[rustc_legacy_const_generics(`4`)]
18022	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18023	pub unsafe fn _mm256_mask_i64scatter_epi64<const SCALE: i32>(
18024	base_addr: *mut i64,
18025	k: __mmask8,
18026	vindex: __m256i,
18027	a: __m256i,
18028	) {
18029	static_assert_imm8_scale!(SCALE);
18030	vpscatterqq_256(slice:base_addr as _, k, offsets:vindex.as_i64x4(), src:a.as_i64x4(), SCALE)
18031	}
18032
18033	/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18034	/// at packed 64-bit integer indices stored in vindex scaled by scale
18035	///
18036	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_pd)
18037	#[inline]
18038	#[target_feature(enable = "avx512f,avx512vl")]
18039	#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = `1`))]
18040	#[rustc_legacy_const_generics(`3`)]
18041	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18042	pub unsafe fn _mm256_i64scatter_pd<const SCALE: i32>(
18043	base_addr: *mut f64,
18044	vindex: __m256i,
18045	a: __m256d,
18046	) {
18047	static_assert_imm8_scale!(SCALE);
18048	vscatterqpd_256(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i64x4(), src:a.as_f64x4(), SCALE)
18049	}
18050
18051	/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18052	/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18053	/// mask bit is not set are not written to memory).
18054	///
18055	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_pd)
18056	#[inline]
18057	#[target_feature(enable = "avx512f,avx512vl")]
18058	#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = `1`))]
18059	#[rustc_legacy_const_generics(`4`)]
18060	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18061	pub unsafe fn _mm256_mask_i64scatter_pd<const SCALE: i32>(
18062	base_addr: *mut f64,
18063	k: __mmask8,
18064	vindex: __m256i,
18065	a: __m256d,
18066	) {
18067	static_assert_imm8_scale!(SCALE);
18068	vscatterqpd_256(slice:base_addr as _, k, offsets:vindex.as_i64x4(), src:a.as_f64x4(), SCALE)
18069	}
18070
18071	/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18072	/// at packed 64-bit integer indices stored in vindex scaled by scale
18073	///
18074	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_ps)
18075	#[inline]
18076	#[target_feature(enable = "avx512f,avx512vl")]
18077	#[cfg_attr(test, assert_instr(vscatterqps, SCALE = `1`))]
18078	#[rustc_legacy_const_generics(`3`)]
18079	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18080	pub unsafe fn _mm256_i64scatter_ps<const SCALE: i32>(
18081	base_addr: *mut f32,
18082	vindex: __m256i,
18083	a: __m128,
18084	) {
18085	static_assert_imm8_scale!(SCALE);
18086	vscatterqps_256(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i64x4(), src:a.as_f32x4(), SCALE)
18087	}
18088
18089	/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18090	/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18091	/// mask bit is not set are not written to memory).
18092	///
18093	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_ps)
18094	#[inline]
18095	#[target_feature(enable = "avx512f,avx512vl")]
18096	#[cfg_attr(test, assert_instr(vscatterqps, SCALE = `1`))]
18097	#[rustc_legacy_const_generics(`4`)]
18098	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18099	pub unsafe fn _mm256_mask_i64scatter_ps<const SCALE: i32>(
18100	base_addr: *mut f32,
18101	k: __mmask8,
18102	vindex: __m256i,
18103	a: __m128,
18104	) {
18105	static_assert_imm8_scale!(SCALE);
18106	vscatterqps_256(slice:base_addr as _, k, offsets:vindex.as_i64x4(), src:a.as_f32x4(), SCALE)
18107	}
18108
18109	/// Loads 8 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18110	/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18111	/// mask bit is not set).
18112	///
18113	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi32)
18114	#[inline]
18115	#[target_feature(enable = "avx512f,avx512vl")]
18116	#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = `1`))]
18117	#[rustc_legacy_const_generics(`4`)]
18118	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18119	pub unsafe fn _mm256_mmask_i32gather_epi32<const SCALE: i32>(
18120	src: __m256i,
18121	k: __mmask8,
18122	vindex: __m256i,
18123	base_addr: *const i32,
18124	) -> __m256i {
18125	static_assert_imm8_scale!(SCALE);
18126	transmute(src:vpgatherdd_256(
18127	src.as_i32x8(),
18128	slice:base_addr as _,
18129	offsets:vindex.as_i32x8(),
18130	k,
18131	SCALE,
18132	))
18133	}
18134
18135	/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18136	/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18137	/// mask bit is not set).
18138	///
18139	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi64)
18140	#[inline]
18141	#[target_feature(enable = "avx512f,avx512vl")]
18142	#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = `1`))]
18143	#[rustc_legacy_const_generics(`4`)]
18144	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18145	pub unsafe fn _mm256_mmask_i32gather_epi64<const SCALE: i32>(
18146	src: __m256i,
18147	k: __mmask8,
18148	vindex: __m128i,
18149	base_addr: *const i64,
18150	) -> __m256i {
18151	static_assert_imm8_scale!(SCALE);
18152	transmute(src:vpgatherdq_256(
18153	src.as_i64x4(),
18154	slice:base_addr as _,
18155	offsets:vindex.as_i32x4(),
18156	k,
18157	SCALE,
18158	))
18159	}
18160
18161	/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18162	/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18163	/// from src when the corresponding mask bit is not set).
18164	///
18165	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_pd)
18166	#[inline]
18167	#[target_feature(enable = "avx512f,avx512vl")]
18168	#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = `1`))]
18169	#[rustc_legacy_const_generics(`4`)]
18170	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18171	pub unsafe fn _mm256_mmask_i32gather_pd<const SCALE: i32>(
18172	src: __m256d,
18173	k: __mmask8,
18174	vindex: __m128i,
18175	base_addr: *const f64,
18176	) -> __m256d {
18177	static_assert_imm8_scale!(SCALE);
18178	transmute(src:vgatherdpd_256(
18179	src.as_f64x4(),
18180	slice:base_addr as _,
18181	offsets:vindex.as_i32x4(),
18182	k,
18183	SCALE,
18184	))
18185	}
18186
18187	/// Loads 8 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18188	/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18189	/// from src when the corresponding mask bit is not set).
18190	///
18191	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_ps)
18192	#[inline]
18193	#[target_feature(enable = "avx512f,avx512vl")]
18194	#[cfg_attr(test, assert_instr(vgatherdps, SCALE = `1`))]
18195	#[rustc_legacy_const_generics(`4`)]
18196	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18197	pub unsafe fn _mm256_mmask_i32gather_ps<const SCALE: i32>(
18198	src: __m256,
18199	k: __mmask8,
18200	vindex: __m256i,
18201	base_addr: *const f32,
18202	) -> __m256 {
18203	static_assert_imm8_scale!(SCALE);
18204	transmute(src:vgatherdps_256(
18205	src.as_f32x8(),
18206	slice:base_addr as _,
18207	offsets:vindex.as_i32x8(),
18208	k,
18209	SCALE,
18210	))
18211	}
18212
18213	/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18214	/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18215	/// mask bit is not set).
18216	///
18217	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi32)
18218	#[inline]
18219	#[target_feature(enable = "avx512f,avx512vl")]
18220	#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = `1`))]
18221	#[rustc_legacy_const_generics(`4`)]
18222	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18223	pub unsafe fn _mm256_mmask_i64gather_epi32<const SCALE: i32>(
18224	src: __m128i,
18225	k: __mmask8,
18226	vindex: __m256i,
18227	base_addr: *const i32,
18228	) -> __m128i {
18229	static_assert_imm8_scale!(SCALE);
18230	transmute(src:vpgatherqd_256(
18231	src.as_i32x4(),
18232	slice:base_addr as _,
18233	offsets:vindex.as_i64x4(),
18234	k,
18235	SCALE,
18236	))
18237	}
18238
18239	/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18240	/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18241	/// mask bit is not set).
18242	///
18243	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi64)
18244	#[inline]
18245	#[target_feature(enable = "avx512f,avx512vl")]
18246	#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = `1`))]
18247	#[rustc_legacy_const_generics(`4`)]
18248	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18249	pub unsafe fn _mm256_mmask_i64gather_epi64<const SCALE: i32>(
18250	src: __m256i,
18251	k: __mmask8,
18252	vindex: __m256i,
18253	base_addr: *const i64,
18254	) -> __m256i {
18255	static_assert_imm8_scale!(SCALE);
18256	transmute(src:vpgatherqq_256(
18257	src.as_i64x4(),
18258	slice:base_addr as _,
18259	offsets:vindex.as_i64x4(),
18260	k,
18261	SCALE,
18262	))
18263	}
18264
18265	/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18266	/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18267	/// from src when the corresponding mask bit is not set).
18268	///
18269	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_pd)
18270	#[inline]
18271	#[target_feature(enable = "avx512f,avx512vl")]
18272	#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = `1`))]
18273	#[rustc_legacy_const_generics(`4`)]
18274	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18275	pub unsafe fn _mm256_mmask_i64gather_pd<const SCALE: i32>(
18276	src: __m256d,
18277	k: __mmask8,
18278	vindex: __m256i,
18279	base_addr: *const f64,
18280	) -> __m256d {
18281	static_assert_imm8_scale!(SCALE);
18282	transmute(src:vgatherqpd_256(
18283	src.as_f64x4(),
18284	slice:base_addr as _,
18285	offsets:vindex.as_i64x4(),
18286	k,
18287	SCALE,
18288	))
18289	}
18290
18291	/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18292	/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18293	/// from src when the corresponding mask bit is not set).
18294	///
18295	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_ps)
18296	#[inline]
18297	#[target_feature(enable = "avx512f,avx512vl")]
18298	#[cfg_attr(test, assert_instr(vgatherqps, SCALE = `1`))]
18299	#[rustc_legacy_const_generics(`4`)]
18300	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18301	pub unsafe fn _mm256_mmask_i64gather_ps<const SCALE: i32>(
18302	src: __m128,
18303	k: __mmask8,
18304	vindex: __m256i,
18305	base_addr: *const f32,
18306	) -> __m128 {
18307	static_assert_imm8_scale!(SCALE);
18308	transmute(src:vgatherqps_256(
18309	src.as_f32x4(),
18310	slice:base_addr as _,
18311	offsets:vindex.as_i64x4(),
18312	k,
18313	SCALE,
18314	))
18315	}
18316
18317	/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
18318	/// indices stored in vindex scaled by scale
18319	///
18320	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi32)
18321	#[inline]
18322	#[target_feature(enable = "avx512f,avx512vl")]
18323	#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = `1`))]
18324	#[rustc_legacy_const_generics(`3`)]
18325	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18326	pub unsafe fn _mm_i32scatter_epi32<const SCALE: i32>(
18327	base_addr: *mut i32,
18328	vindex: __m128i,
18329	a: __m128i,
18330	) {
18331	static_assert_imm8_scale!(SCALE);
18332	vpscatterdd_128(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i32x4(), src:a.as_i32x4(), SCALE)
18333	}
18334
18335	/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
18336	/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18337	/// are not written to memory).
18338	///
18339	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi32)
18340	#[inline]
18341	#[target_feature(enable = "avx512f,avx512vl")]
18342	#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = `1`))]
18343	#[rustc_legacy_const_generics(`4`)]
18344	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18345	pub unsafe fn _mm_mask_i32scatter_epi32<const SCALE: i32>(
18346	base_addr: *mut i32,
18347	k: __mmask8,
18348	vindex: __m128i,
18349	a: __m128i,
18350	) {
18351	static_assert_imm8_scale!(SCALE);
18352	vpscatterdd_128(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_i32x4(), SCALE)
18353	}
18354
18355	/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
18356	/// indices stored in vindex scaled by scale
18357	///
18358	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi64)
18359	#[inline]
18360	#[target_feature(enable = "avx512f,avx512vl")]
18361	#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = `1`))]
18362	#[rustc_legacy_const_generics(`3`)]
18363	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18364	pub unsafe fn _mm_i32scatter_epi64<const SCALE: i32>(
18365	base_addr: *mut i64,
18366	vindex: __m128i,
18367	a: __m128i,
18368	) {
18369	static_assert_imm8_scale!(SCALE);
18370	vpscatterdq_128(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i32x4(), src:a.as_i64x2(), SCALE)
18371	}
18372
18373	/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
18374	/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18375	/// are not written to memory).
18376	///
18377	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi64)
18378	#[inline]
18379	#[target_feature(enable = "avx512f,avx512vl")]
18380	#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = `1`))]
18381	#[rustc_legacy_const_generics(`4`)]
18382	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18383	pub unsafe fn _mm_mask_i32scatter_epi64<const SCALE: i32>(
18384	base_addr: *mut i64,
18385	k: __mmask8,
18386	vindex: __m128i,
18387	a: __m128i,
18388	) {
18389	static_assert_imm8_scale!(SCALE);
18390	vpscatterdq_128(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_i64x2(), SCALE)
18391	}
18392
18393	/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18394	/// at packed 32-bit integer indices stored in vindex scaled by scale
18395	///
18396	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_pd)
18397	#[inline]
18398	#[target_feature(enable = "avx512f,avx512vl")]
18399	#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = `1`))]
18400	#[rustc_legacy_const_generics(`3`)]
18401	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18402	pub unsafe fn _mm_i32scatter_pd<const SCALE: i32>(
18403	base_addr: *mut f64,
18404	vindex: __m128i,
18405	a: __m128d,
18406	) {
18407	static_assert_imm8_scale!(SCALE);
18408	vscatterdpd_128(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i32x4(), src:a.as_f64x2(), SCALE)
18409	}
18410
18411	/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18412	/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18413	/// mask bit is not set are not written to memory).
18414	///
18415	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_pd)
18416	#[inline]
18417	#[target_feature(enable = "avx512f,avx512vl")]
18418	#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = `1`))]
18419	#[rustc_legacy_const_generics(`4`)]
18420	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18421	pub unsafe fn _mm_mask_i32scatter_pd<const SCALE: i32>(
18422	base_addr: *mut f64,
18423	k: __mmask8,
18424	vindex: __m128i,
18425	a: __m128d,
18426	) {
18427	static_assert_imm8_scale!(SCALE);
18428	vscatterdpd_128(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_f64x2(), SCALE)
18429	}
18430
18431	/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18432	/// at packed 32-bit integer indices stored in vindex scaled by scale
18433	///
18434	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_ps)
18435	#[inline]
18436	#[target_feature(enable = "avx512f,avx512vl")]
18437	#[cfg_attr(test, assert_instr(vscatterdps, SCALE = `1`))]
18438	#[rustc_legacy_const_generics(`3`)]
18439	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18440	pub unsafe fn _mm_i32scatter_ps<const SCALE: i32>(base_addr: *mut f32, vindex: __m128i, a: __m128) {
18441	static_assert_imm8_scale!(SCALE);
18442	vscatterdps_128(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i32x4(), src:a.as_f32x4(), SCALE)
18443	}
18444
18445	/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18446	/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18447	/// mask bit is not set are not written to memory).
18448	///
18449	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_ps)
18450	#[inline]
18451	#[target_feature(enable = "avx512f,avx512vl")]
18452	#[cfg_attr(test, assert_instr(vscatterdps, SCALE = `1`))]
18453	#[rustc_legacy_const_generics(`4`)]
18454	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18455	pub unsafe fn _mm_mask_i32scatter_ps<const SCALE: i32>(
18456	base_addr: *mut f32,
18457	k: __mmask8,
18458	vindex: __m128i,
18459	a: __m128,
18460	) {
18461	static_assert_imm8_scale!(SCALE);
18462	vscatterdps_128(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_f32x4(), SCALE)
18463	}
18464
18465	/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18466	/// indices stored in vindex scaled by scale
18467	///
18468	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi32)
18469	#[inline]
18470	#[target_feature(enable = "avx512f,avx512vl")]
18471	#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = `1`))]
18472	#[rustc_legacy_const_generics(`3`)]
18473	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18474	pub unsafe fn _mm_i64scatter_epi32<const SCALE: i32>(
18475	base_addr: *mut i32,
18476	vindex: __m128i,
18477	a: __m128i,
18478	) {
18479	static_assert_imm8_scale!(SCALE);
18480	vpscatterqd_128(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i64x2(), src:a.as_i32x4(), SCALE)
18481	}
18482
18483	/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18484	/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18485	/// are not written to memory).
18486	///
18487	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi32)
18488	#[inline]
18489	#[target_feature(enable = "avx512f,avx512vl")]
18490	#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = `1`))]
18491	#[rustc_legacy_const_generics(`4`)]
18492	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18493	pub unsafe fn _mm_mask_i64scatter_epi32<const SCALE: i32>(
18494	base_addr: *mut i32,
18495	k: __mmask8,
18496	vindex: __m128i,
18497	a: __m128i,
18498	) {
18499	static_assert_imm8_scale!(SCALE);
18500	vpscatterqd_128(slice:base_addr as _, k, offsets:vindex.as_i64x2(), src:a.as_i32x4(), SCALE)
18501	}
18502
18503	/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18504	/// indices stored in vindex scaled by scale
18505	///
18506	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi64)
18507	#[inline]
18508	#[target_feature(enable = "avx512f,avx512vl")]
18509	#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = `1`))]
18510	#[rustc_legacy_const_generics(`3`)]
18511	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18512	pub unsafe fn _mm_i64scatter_epi64<const SCALE: i32>(
18513	base_addr: *mut i64,
18514	vindex: __m128i,
18515	a: __m128i,
18516	) {
18517	static_assert_imm8_scale!(SCALE);
18518	vpscatterqq_128(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i64x2(), src:a.as_i64x2(), SCALE)
18519	}
18520
18521	/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18522	/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18523	/// are not written to memory).
18524	///
18525	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi64)
18526	#[inline]
18527	#[target_feature(enable = "avx512f,avx512vl")]
18528	#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = `1`))]
18529	#[rustc_legacy_const_generics(`4`)]
18530	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18531	pub unsafe fn _mm_mask_i64scatter_epi64<const SCALE: i32>(
18532	base_addr: *mut i64,
18533	k: __mmask8,
18534	vindex: __m128i,
18535	a: __m128i,
18536	) {
18537	static_assert_imm8_scale!(SCALE);
18538	vpscatterqq_128(slice:base_addr as _, k, offsets:vindex.as_i64x2(), src:a.as_i64x2(), SCALE)
18539	}
18540
18541	/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18542	/// at packed 64-bit integer indices stored in vindex scaled by scale
18543	///
18544	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_pd)
18545	#[inline]
18546	#[target_feature(enable = "avx512f,avx512vl")]
18547	#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = `1`))]
18548	#[rustc_legacy_const_generics(`3`)]
18549	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18550	pub unsafe fn _mm_i64scatter_pd<const SCALE: i32>(
18551	base_addr: *mut f64,
18552	vindex: __m128i,
18553	a: __m128d,
18554	) {
18555	static_assert_imm8_scale!(SCALE);
18556	vscatterqpd_128(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i64x2(), src:a.as_f64x2(), SCALE)
18557	}
18558
18559	/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18560	/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18561	/// mask bit is not set are not written to memory).
18562	///
18563	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_pd)
18564	#[inline]
18565	#[target_feature(enable = "avx512f,avx512vl")]
18566	#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = `1`))]
18567	#[rustc_legacy_const_generics(`4`)]
18568	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18569	pub unsafe fn _mm_mask_i64scatter_pd<const SCALE: i32>(
18570	base_addr: *mut f64,
18571	k: __mmask8,
18572	vindex: __m128i,
18573	a: __m128d,
18574	) {
18575	static_assert_imm8_scale!(SCALE);
18576	vscatterqpd_128(slice:base_addr as _, k, offsets:vindex.as_i64x2(), src:a.as_f64x2(), SCALE)
18577	}
18578
18579	/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18580	/// at packed 64-bit integer indices stored in vindex scaled by scale
18581	///
18582	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_ps)
18583	#[inline]
18584	#[target_feature(enable = "avx512f,avx512vl")]
18585	#[cfg_attr(test, assert_instr(vscatterqps, SCALE = `1`))]
18586	#[rustc_legacy_const_generics(`3`)]
18587	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18588	pub unsafe fn _mm_i64scatter_ps<const SCALE: i32>(base_addr: *mut f32, vindex: __m128i, a: __m128) {
18589	static_assert_imm8_scale!(SCALE);
18590	vscatterqps_128(slice:base_addr as _, k:`0xff`, offsets:vindex.as_i64x2(), src:a.as_f32x4(), SCALE)
18591	}
18592
18593	/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18594	/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18595	///
18596	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_ps)
18597	#[inline]
18598	#[target_feature(enable = "avx512f,avx512vl")]
18599	#[cfg_attr(test, assert_instr(vscatterqps, SCALE = `1`))]
18600	#[rustc_legacy_const_generics(`4`)]
18601	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18602	pub unsafe fn _mm_mask_i64scatter_ps<const SCALE: i32>(
18603	base_addr: *mut f32,
18604	k: __mmask8,
18605	vindex: __m128i,
18606	a: __m128,
18607	) {
18608	static_assert_imm8_scale!(SCALE);
18609	vscatterqps_128(slice:base_addr as _, k, offsets:vindex.as_i64x2(), src:a.as_f32x4(), SCALE)
18610	}
18611
18612	/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18613	/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18614	/// mask bit is not set).
18615	///
18616	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi32)
18617	#[inline]
18618	#[target_feature(enable = "avx512f,avx512vl")]
18619	#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = `1`))]
18620	#[rustc_legacy_const_generics(`4`)]
18621	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18622	pub unsafe fn _mm_mmask_i32gather_epi32<const SCALE: i32>(
18623	src: __m128i,
18624	k: __mmask8,
18625	vindex: __m128i,
18626	base_addr: *const i32,
18627	) -> __m128i {
18628	static_assert_imm8_scale!(SCALE);
18629	transmute(src:vpgatherdd_128(
18630	src.as_i32x4(),
18631	slice:base_addr as _,
18632	offsets:vindex.as_i32x4(),
18633	k,
18634	SCALE,
18635	))
18636	}
18637
18638	/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18639	/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18640	/// mask bit is not set).
18641	///
18642	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi64)
18643	#[inline]
18644	#[target_feature(enable = "avx512f,avx512vl")]
18645	#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = `1`))]
18646	#[rustc_legacy_const_generics(`4`)]
18647	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18648	pub unsafe fn _mm_mmask_i32gather_epi64<const SCALE: i32>(
18649	src: __m128i,
18650	k: __mmask8,
18651	vindex: __m128i,
18652	base_addr: *const i64,
18653	) -> __m128i {
18654	static_assert_imm8_scale!(SCALE);
18655	transmute(src:vpgatherdq_128(
18656	src.as_i64x2(),
18657	slice:base_addr as _,
18658	offsets:vindex.as_i32x4(),
18659	k,
18660	SCALE,
18661	))
18662	}
18663
18664	/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18665	/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18666	/// from src when the corresponding mask bit is not set).
18667	///
18668	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_pd)
18669	#[inline]
18670	#[target_feature(enable = "avx512f,avx512vl")]
18671	#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = `1`))]
18672	#[rustc_legacy_const_generics(`4`)]
18673	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18674	pub unsafe fn _mm_mmask_i32gather_pd<const SCALE: i32>(
18675	src: __m128d,
18676	k: __mmask8,
18677	vindex: __m128i,
18678	base_addr: *const f64,
18679	) -> __m128d {
18680	static_assert_imm8_scale!(SCALE);
18681	transmute(src:vgatherdpd_128(
18682	src.as_f64x2(),
18683	slice:base_addr as _,
18684	offsets:vindex.as_i32x4(),
18685	k,
18686	SCALE,
18687	))
18688	}
18689
18690	/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18691	/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18692	/// from src when the corresponding mask bit is not set).
18693	///
18694	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_ps)
18695	#[inline]
18696	#[target_feature(enable = "avx512f,avx512vl")]
18697	#[cfg_attr(test, assert_instr(vgatherdps, SCALE = `1`))]
18698	#[rustc_legacy_const_generics(`4`)]
18699	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18700	pub unsafe fn _mm_mmask_i32gather_ps<const SCALE: i32>(
18701	src: __m128,
18702	k: __mmask8,
18703	vindex: __m128i,
18704	base_addr: *const f32,
18705	) -> __m128 {
18706	static_assert_imm8_scale!(SCALE);
18707	transmute(src:vgatherdps_128(
18708	src.as_f32x4(),
18709	slice:base_addr as _,
18710	offsets:vindex.as_i32x4(),
18711	k,
18712	SCALE,
18713	))
18714	}
18715
18716	/// Loads 2 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18717	/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18718	/// mask bit is not set).
18719	///
18720	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi32)
18721	#[inline]
18722	#[target_feature(enable = "avx512f,avx512vl")]
18723	#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = `1`))]
18724	#[rustc_legacy_const_generics(`4`)]
18725	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18726	pub unsafe fn _mm_mmask_i64gather_epi32<const SCALE: i32>(
18727	src: __m128i,
18728	k: __mmask8,
18729	vindex: __m128i,
18730	base_addr: *const i32,
18731	) -> __m128i {
18732	static_assert_imm8_scale!(SCALE);
18733	transmute(src:vpgatherqd_128(
18734	src.as_i32x4(),
18735	slice:base_addr as _,
18736	offsets:vindex.as_i64x2(),
18737	k,
18738	SCALE,
18739	))
18740	}
18741
18742	/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18743	/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18744	/// mask bit is not set).
18745	///
18746	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi64)
18747	#[inline]
18748	#[target_feature(enable = "avx512f,avx512vl")]
18749	#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = `1`))]
18750	#[rustc_legacy_const_generics(`4`)]
18751	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18752	pub unsafe fn _mm_mmask_i64gather_epi64<const SCALE: i32>(
18753	src: __m128i,
18754	k: __mmask8,
18755	vindex: __m128i,
18756	base_addr: *const i64,
18757	) -> __m128i {
18758	static_assert_imm8_scale!(SCALE);
18759	transmute(src:vpgatherqq_128(
18760	src.as_i64x2(),
18761	slice:base_addr as _,
18762	offsets:vindex.as_i64x2(),
18763	k,
18764	SCALE,
18765	))
18766	}
18767
18768	/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18769	/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18770	/// from src when the corresponding mask bit is not set).
18771	///
18772	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_pd)
18773	#[inline]
18774	#[target_feature(enable = "avx512f,avx512vl")]
18775	#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = `1`))]
18776	#[rustc_legacy_const_generics(`4`)]
18777	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18778	pub unsafe fn _mm_mmask_i64gather_pd<const SCALE: i32>(
18779	src: __m128d,
18780	k: __mmask8,
18781	vindex: __m128i,
18782	base_addr: *const f64,
18783	) -> __m128d {
18784	static_assert_imm8_scale!(SCALE);
18785	transmute(src:vgatherqpd_128(
18786	src.as_f64x2(),
18787	slice:base_addr as _,
18788	offsets:vindex.as_i64x2(),
18789	k,
18790	SCALE,
18791	))
18792	}
18793
18794	/// Loads 2 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18795	/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18796	/// from src when the corresponding mask bit is not set).
18797	///
18798	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_ps)
18799	#[inline]
18800	#[target_feature(enable = "avx512f,avx512vl")]
18801	#[cfg_attr(test, assert_instr(vgatherqps, SCALE = `1`))]
18802	#[rustc_legacy_const_generics(`4`)]
18803	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18804	pub unsafe fn _mm_mmask_i64gather_ps<const SCALE: i32>(
18805	src: __m128,
18806	k: __mmask8,
18807	vindex: __m128i,
18808	base_addr: *const f32,
18809	) -> __m128 {
18810	static_assert_imm8_scale!(SCALE);
18811	transmute(src:vgatherqps_128(
18812	src.as_f32x4(),
18813	slice:base_addr as _,
18814	offsets:vindex.as_i64x2(),
18815	k,
18816	SCALE,
18817	))
18818	}
18819
18820	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18821	///
18822	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi32&expand=1198)
18823	#[inline]
18824	#[target_feature(enable = "avx512f")]
18825	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18826	#[cfg_attr(test, assert_instr(vpcompressd))]
18827	pub fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
18828	unsafe { transmute(src:vpcompressd(a.as_i32x16(), src.as_i32x16(), mask:k)) }
18829	}
18830
18831	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18832	///
18833	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi32&expand=1199)
18834	#[inline]
18835	#[target_feature(enable = "avx512f")]
18836	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18837	#[cfg_attr(test, assert_instr(vpcompressd))]
18838	pub fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i {
18839	unsafe { transmute(src:vpcompressd(a.as_i32x16(), src:i32x16::ZERO, mask:k)) }
18840	}
18841
18842	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18843	///
18844	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi32&expand=1196)
18845	#[inline]
18846	#[target_feature(enable = "avx512f,avx512vl")]
18847	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18848	#[cfg_attr(test, assert_instr(vpcompressd))]
18849	pub fn _mm256_mask_compress_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18850	unsafe { transmute(src:vpcompressd256(a.as_i32x8(), src.as_i32x8(), mask:k)) }
18851	}
18852
18853	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18854	///
18855	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi32&expand=1197)
18856	#[inline]
18857	#[target_feature(enable = "avx512f,avx512vl")]
18858	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18859	#[cfg_attr(test, assert_instr(vpcompressd))]
18860	pub fn _mm256_maskz_compress_epi32(k: __mmask8, a: __m256i) -> __m256i {
18861	unsafe { transmute(src:vpcompressd256(a.as_i32x8(), src:i32x8::ZERO, mask:k)) }
18862	}
18863
18864	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18865	///
18866	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi32&expand=1194)
18867	#[inline]
18868	#[target_feature(enable = "avx512f,avx512vl")]
18869	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18870	#[cfg_attr(test, assert_instr(vpcompressd))]
18871	pub fn _mm_mask_compress_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18872	unsafe { transmute(src:vpcompressd128(a.as_i32x4(), src.as_i32x4(), mask:k)) }
18873	}
18874
18875	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18876	///
18877	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi32&expand=1195)
18878	#[inline]
18879	#[target_feature(enable = "avx512f,avx512vl")]
18880	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18881	#[cfg_attr(test, assert_instr(vpcompressd))]
18882	pub fn _mm_maskz_compress_epi32(k: __mmask8, a: __m128i) -> __m128i {
18883	unsafe { transmute(src:vpcompressd128(a.as_i32x4(), src:i32x4::ZERO, mask:k)) }
18884	}
18885
18886	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18887	///
18888	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi64&expand=1204)
18889	#[inline]
18890	#[target_feature(enable = "avx512f")]
18891	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18892	#[cfg_attr(test, assert_instr(vpcompressq))]
18893	pub fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
18894	unsafe { transmute(src:vpcompressq(a.as_i64x8(), src.as_i64x8(), mask:k)) }
18895	}
18896
18897	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18898	///
18899	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi64&expand=1205)
18900	#[inline]
18901	#[target_feature(enable = "avx512f")]
18902	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18903	#[cfg_attr(test, assert_instr(vpcompressq))]
18904	pub fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i {
18905	unsafe { transmute(src:vpcompressq(a.as_i64x8(), src:i64x8::ZERO, mask:k)) }
18906	}
18907
18908	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18909	///
18910	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi64&expand=1202)
18911	#[inline]
18912	#[target_feature(enable = "avx512f,avx512vl")]
18913	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18914	#[cfg_attr(test, assert_instr(vpcompressq))]
18915	pub fn _mm256_mask_compress_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18916	unsafe { transmute(src:vpcompressq256(a.as_i64x4(), src.as_i64x4(), mask:k)) }
18917	}
18918
18919	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18920	///
18921	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi64&expand=1203)
18922	#[inline]
18923	#[target_feature(enable = "avx512f,avx512vl")]
18924	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18925	#[cfg_attr(test, assert_instr(vpcompressq))]
18926	pub fn _mm256_maskz_compress_epi64(k: __mmask8, a: __m256i) -> __m256i {
18927	unsafe { transmute(src:vpcompressq256(a.as_i64x4(), src:i64x4::ZERO, mask:k)) }
18928	}
18929
18930	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18931	///
18932	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi64&expand=1200)
18933	#[inline]
18934	#[target_feature(enable = "avx512f,avx512vl")]
18935	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18936	#[cfg_attr(test, assert_instr(vpcompressq))]
18937	pub fn _mm_mask_compress_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18938	unsafe { transmute(src:vpcompressq128(a.as_i64x2(), src.as_i64x2(), mask:k)) }
18939	}
18940
18941	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18942	///
18943	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi64&expand=1201)
18944	#[inline]
18945	#[target_feature(enable = "avx512f,avx512vl")]
18946	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18947	#[cfg_attr(test, assert_instr(vpcompressq))]
18948	pub fn _mm_maskz_compress_epi64(k: __mmask8, a: __m128i) -> __m128i {
18949	unsafe { transmute(src:vpcompressq128(a.as_i64x2(), src:i64x2::ZERO, mask:k)) }
18950	}
18951
18952	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18953	///
18954	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_ps&expand=1222)
18955	#[inline]
18956	#[target_feature(enable = "avx512f")]
18957	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18958	#[cfg_attr(test, assert_instr(vcompressps))]
18959	pub fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
18960	unsafe { transmute(src:vcompressps(a.as_f32x16(), src.as_f32x16(), mask:k)) }
18961	}
18962
18963	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18964	///
18965	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_ps&expand=1223)
18966	#[inline]
18967	#[target_feature(enable = "avx512f")]
18968	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18969	#[cfg_attr(test, assert_instr(vcompressps))]
18970	pub fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 {
18971	unsafe { transmute(src:vcompressps(a.as_f32x16(), src:f32x16::ZERO, mask:k)) }
18972	}
18973
18974	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18975	///
18976	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_ps&expand=1220)
18977	#[inline]
18978	#[target_feature(enable = "avx512f,avx512vl")]
18979	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18980	#[cfg_attr(test, assert_instr(vcompressps))]
18981	pub fn _mm256_mask_compress_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
18982	unsafe { transmute(src:vcompressps256(a.as_f32x8(), src.as_f32x8(), mask:k)) }
18983	}
18984
18985	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18986	///
18987	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_ps&expand=1221)
18988	#[inline]
18989	#[target_feature(enable = "avx512f,avx512vl")]
18990	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18991	#[cfg_attr(test, assert_instr(vcompressps))]
18992	pub fn _mm256_maskz_compress_ps(k: __mmask8, a: __m256) -> __m256 {
18993	unsafe { transmute(src:vcompressps256(a.as_f32x8(), src:f32x8::ZERO, mask:k)) }
18994	}
18995
18996	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18997	///
18998	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_ps&expand=1218)
18999	#[inline]
19000	#[target_feature(enable = "avx512f,avx512vl")]
19001	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19002	#[cfg_attr(test, assert_instr(vcompressps))]
19003	pub fn _mm_mask_compress_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
19004	unsafe { transmute(src:vcompressps128(a.as_f32x4(), src.as_f32x4(), mask:k)) }
19005	}
19006
19007	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
19008	///
19009	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_ps&expand=1219)
19010	#[inline]
19011	#[target_feature(enable = "avx512f,avx512vl")]
19012	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19013	#[cfg_attr(test, assert_instr(vcompressps))]
19014	pub fn _mm_maskz_compress_ps(k: __mmask8, a: __m128) -> __m128 {
19015	unsafe { transmute(src:vcompressps128(a.as_f32x4(), src:f32x4::ZERO, mask:k)) }
19016	}
19017
19018	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
19019	///
19020	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_pd&expand=1216)
19021	#[inline]
19022	#[target_feature(enable = "avx512f")]
19023	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19024	#[cfg_attr(test, assert_instr(vcompresspd))]
19025	pub fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
19026	unsafe { transmute(src:vcompresspd(a.as_f64x8(), src.as_f64x8(), mask:k)) }
19027	}
19028
19029	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
19030	///
19031	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_pd&expand=1217)
19032	#[inline]
19033	#[target_feature(enable = "avx512f")]
19034	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19035	#[cfg_attr(test, assert_instr(vcompresspd))]
19036	pub fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d {
19037	unsafe { transmute(src:vcompresspd(a.as_f64x8(), src:f64x8::ZERO, mask:k)) }
19038	}
19039
19040	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
19041	///
19042	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_pd&expand=1214)
19043	#[inline]
19044	#[target_feature(enable = "avx512f,avx512vl")]
19045	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19046	#[cfg_attr(test, assert_instr(vcompresspd))]
19047	pub fn _mm256_mask_compress_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
19048	unsafe { transmute(src:vcompresspd256(a.as_f64x4(), src.as_f64x4(), mask:k)) }
19049	}
19050
19051	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
19052	///
19053	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_pd&expand=1215)
19054	#[inline]
19055	#[target_feature(enable = "avx512f,avx512vl")]
19056	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19057	#[cfg_attr(test, assert_instr(vcompresspd))]
19058	pub fn _mm256_maskz_compress_pd(k: __mmask8, a: __m256d) -> __m256d {
19059	unsafe { transmute(src:vcompresspd256(a.as_f64x4(), src:f64x4::ZERO, mask:k)) }
19060	}
19061
19062	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
19063	///
19064	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_pd&expand=1212)
19065	#[inline]
19066	#[target_feature(enable = "avx512f,avx512vl")]
19067	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19068	#[cfg_attr(test, assert_instr(vcompresspd))]
19069	pub fn _mm_mask_compress_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
19070	unsafe { transmute(src:vcompresspd128(a.as_f64x2(), src.as_f64x2(), mask:k)) }
19071	}
19072
19073	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
19074	///
19075	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_pd&expand=1213)
19076	#[inline]
19077	#[target_feature(enable = "avx512f,avx512vl")]
19078	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19079	#[cfg_attr(test, assert_instr(vcompresspd))]
19080	pub fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d {
19081	unsafe { transmute(src:vcompresspd128(a.as_f64x2(), src:f64x2::ZERO, mask:k)) }
19082	}
19083
19084	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19085	///
19086	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi32)
19087	#[inline]
19088	#[target_feature(enable = "avx512f")]
19089	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19090	#[cfg_attr(test, assert_instr(vpcompressd))]
19091	pub unsafe fn _mm512_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask16, a: __m512i) {
19092	vcompressstored(mem:base_addr as *mut _, data:a.as_i32x16(), mask:k)
19093	}
19094
19095	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19096	///
19097	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi32)
19098	#[inline]
19099	#[target_feature(enable = "avx512f,avx512vl")]
19100	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19101	#[cfg_attr(test, assert_instr(vpcompressd))]
19102	pub unsafe fn _mm256_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask8, a: __m256i) {
19103	vcompressstored256(mem:base_addr as *mut _, data:a.as_i32x8(), mask:k)
19104	}
19105
19106	/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19107	///
19108	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi32)
19109	#[inline]
19110	#[target_feature(enable = "avx512f,avx512vl")]
19111	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19112	#[cfg_attr(test, assert_instr(vpcompressd))]
19113	pub unsafe fn _mm_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask8, a: __m128i) {
19114	vcompressstored128(mem:base_addr as *mut _, data:a.as_i32x4(), mask:k)
19115	}
19116
19117	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19118	///
19119	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi64)
19120	#[inline]
19121	#[target_feature(enable = "avx512f")]
19122	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19123	#[cfg_attr(test, assert_instr(vpcompressq))]
19124	pub unsafe fn _mm512_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m512i) {
19125	vcompressstoreq(mem:base_addr as *mut _, data:a.as_i64x8(), mask:k)
19126	}
19127
19128	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19129	///
19130	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi64)
19131	#[inline]
19132	#[target_feature(enable = "avx512f,avx512vl")]
19133	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19134	#[cfg_attr(test, assert_instr(vpcompressq))]
19135	pub unsafe fn _mm256_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m256i) {
19136	vcompressstoreq256(mem:base_addr as *mut _, data:a.as_i64x4(), mask:k)
19137	}
19138
19139	/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19140	///
19141	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi64)
19142	#[inline]
19143	#[target_feature(enable = "avx512f,avx512vl")]
19144	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19145	#[cfg_attr(test, assert_instr(vpcompressq))]
19146	pub unsafe fn _mm_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m128i) {
19147	vcompressstoreq128(mem:base_addr as *mut _, data:a.as_i64x2(), mask:k)
19148	}
19149
19150	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19151	///
19152	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_ps)
19153	#[inline]
19154	#[target_feature(enable = "avx512f")]
19155	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19156	#[cfg_attr(test, assert_instr(vcompressps))]
19157	pub unsafe fn _mm512_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask16, a: __m512) {
19158	vcompressstoreps(mem:base_addr as *mut _, data:a.as_f32x16(), mask:k)
19159	}
19160
19161	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19162	///
19163	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_ps)
19164	#[inline]
19165	#[target_feature(enable = "avx512f,avx512vl")]
19166	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19167	#[cfg_attr(test, assert_instr(vcompressps))]
19168	pub unsafe fn _mm256_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask8, a: __m256) {
19169	vcompressstoreps256(mem:base_addr as *mut _, data:a.as_f32x8(), mask:k)
19170	}
19171
19172	/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19173	///
19174	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_ps)
19175	#[inline]
19176	#[target_feature(enable = "avx512f,avx512vl")]
19177	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19178	#[cfg_attr(test, assert_instr(vcompressps))]
19179	pub unsafe fn _mm_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask8, a: __m128) {
19180	vcompressstoreps128(mem:base_addr as *mut _, data:a.as_f32x4(), mask:k)
19181	}
19182
19183	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19184	///
19185	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_pd)
19186	#[inline]
19187	#[target_feature(enable = "avx512f")]
19188	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19189	#[cfg_attr(test, assert_instr(vcompresspd))]
19190	pub unsafe fn _mm512_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m512d) {
19191	vcompressstorepd(mem:base_addr as *mut _, data:a.as_f64x8(), mask:k)
19192	}
19193
19194	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19195	///
19196	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_pd)
19197	#[inline]
19198	#[target_feature(enable = "avx512f,avx512vl")]
19199	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19200	#[cfg_attr(test, assert_instr(vcompresspd))]
19201	pub unsafe fn _mm256_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m256d) {
19202	vcompressstorepd256(mem:base_addr as *mut _, data:a.as_f64x4(), mask:k)
19203	}
19204
19205	/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19206	///
19207	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_pd)
19208	#[inline]
19209	#[target_feature(enable = "avx512f,avx512vl")]
19210	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19211	#[cfg_attr(test, assert_instr(vcompresspd))]
19212	pub unsafe fn _mm_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m128d) {
19213	vcompressstorepd128(mem:base_addr as *mut _, data:a.as_f64x2(), mask:k)
19214	}
19215
19216	/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19217	///
19218	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi32&expand=2316)
19219	#[inline]
19220	#[target_feature(enable = "avx512f")]
19221	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19222	#[cfg_attr(test, assert_instr(vpexpandd))]
19223	pub fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19224	unsafe { transmute(src:vpexpandd(a.as_i32x16(), src.as_i32x16(), mask:k)) }
19225	}
19226
19227	/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19228	///
19229	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi32&expand=2317)
19230	#[inline]
19231	#[target_feature(enable = "avx512f")]
19232	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19233	#[cfg_attr(test, assert_instr(vpexpandd))]
19234	pub fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i {
19235	unsafe { transmute(src:vpexpandd(a.as_i32x16(), src:i32x16::ZERO, mask:k)) }
19236	}
19237
19238	/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19239	///
19240	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi32&expand=2314)
19241	#[inline]
19242	#[target_feature(enable = "avx512f,avx512vl")]
19243	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19244	#[cfg_attr(test, assert_instr(vpexpandd))]
19245	pub fn _mm256_mask_expand_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19246	unsafe { transmute(src:vpexpandd256(a.as_i32x8(), src.as_i32x8(), mask:k)) }
19247	}
19248
19249	/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19250	///
19251	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi32&expand=2315)
19252	#[inline]
19253	#[target_feature(enable = "avx512f,avx512vl")]
19254	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19255	#[cfg_attr(test, assert_instr(vpexpandd))]
19256	pub fn _mm256_maskz_expand_epi32(k: __mmask8, a: __m256i) -> __m256i {
19257	unsafe { transmute(src:vpexpandd256(a.as_i32x8(), src:i32x8::ZERO, mask:k)) }
19258	}
19259
19260	/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19261	///
19262	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi32&expand=2312)
19263	#[inline]
19264	#[target_feature(enable = "avx512f,avx512vl")]
19265	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19266	#[cfg_attr(test, assert_instr(vpexpandd))]
19267	pub fn _mm_mask_expand_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19268	unsafe { transmute(src:vpexpandd128(a.as_i32x4(), src.as_i32x4(), mask:k)) }
19269	}
19270
19271	/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19272	///
19273	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi32&expand=2313)
19274	#[inline]
19275	#[target_feature(enable = "avx512f,avx512vl")]
19276	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19277	#[cfg_attr(test, assert_instr(vpexpandd))]
19278	pub fn _mm_maskz_expand_epi32(k: __mmask8, a: __m128i) -> __m128i {
19279	unsafe { transmute(src:vpexpandd128(a.as_i32x4(), src:i32x4::ZERO, mask:k)) }
19280	}
19281
19282	/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19283	///
19284	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi64&expand=2322)
19285	#[inline]
19286	#[target_feature(enable = "avx512f")]
19287	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19288	#[cfg_attr(test, assert_instr(vpexpandq))]
19289	pub fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19290	unsafe { transmute(src:vpexpandq(a.as_i64x8(), src.as_i64x8(), mask:k)) }
19291	}
19292
19293	/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19294	///
19295	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi64&expand=2323)
19296	#[inline]
19297	#[target_feature(enable = "avx512f")]
19298	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19299	#[cfg_attr(test, assert_instr(vpexpandq))]
19300	pub fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i {
19301	unsafe { transmute(src:vpexpandq(a.as_i64x8(), src:i64x8::ZERO, mask:k)) }
19302	}
19303
19304	/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19305	///
19306	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi64&expand=2320)
19307	#[inline]
19308	#[target_feature(enable = "avx512f,avx512vl")]
19309	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19310	#[cfg_attr(test, assert_instr(vpexpandq))]
19311	pub fn _mm256_mask_expand_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19312	unsafe { transmute(src:vpexpandq256(a.as_i64x4(), src.as_i64x4(), mask:k)) }
19313	}
19314
19315	/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19316	///
19317	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi64&expand=2321)
19318	#[inline]
19319	#[target_feature(enable = "avx512f,avx512vl")]
19320	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19321	#[cfg_attr(test, assert_instr(vpexpandq))]
19322	pub fn _mm256_maskz_expand_epi64(k: __mmask8, a: __m256i) -> __m256i {
19323	unsafe { transmute(src:vpexpandq256(a.as_i64x4(), src:i64x4::ZERO, mask:k)) }
19324	}
19325
19326	/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19327	///
19328	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi64&expand=2318)
19329	#[inline]
19330	#[target_feature(enable = "avx512f,avx512vl")]
19331	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19332	#[cfg_attr(test, assert_instr(vpexpandq))]
19333	pub fn _mm_mask_expand_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19334	unsafe { transmute(src:vpexpandq128(a.as_i64x2(), src.as_i64x2(), mask:k)) }
19335	}
19336
19337	/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19338	///
19339	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi64&expand=2319)
19340	#[inline]
19341	#[target_feature(enable = "avx512f,avx512vl")]
19342	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19343	#[cfg_attr(test, assert_instr(vpexpandq))]
19344	pub fn _mm_maskz_expand_epi64(k: __mmask8, a: __m128i) -> __m128i {
19345	unsafe { transmute(src:vpexpandq128(a.as_i64x2(), src:i64x2::ZERO, mask:k)) }
19346	}
19347
19348	/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19349	///
19350	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_ps&expand=2340)
19351	#[inline]
19352	#[target_feature(enable = "avx512f")]
19353	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19354	#[cfg_attr(test, assert_instr(vexpandps))]
19355	pub fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
19356	unsafe { transmute(src:vexpandps(a.as_f32x16(), src.as_f32x16(), mask:k)) }
19357	}
19358
19359	/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19360	///
19361	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_ps&expand=2341)
19362	#[inline]
19363	#[target_feature(enable = "avx512f")]
19364	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19365	#[cfg_attr(test, assert_instr(vexpandps))]
19366	pub fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 {
19367	unsafe { transmute(src:vexpandps(a.as_f32x16(), src:f32x16::ZERO, mask:k)) }
19368	}
19369
19370	/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19371	///
19372	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_ps&expand=2338)
19373	#[inline]
19374	#[target_feature(enable = "avx512f,avx512vl")]
19375	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19376	#[cfg_attr(test, assert_instr(vexpandps))]
19377	pub fn _mm256_mask_expand_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
19378	unsafe { transmute(src:vexpandps256(a.as_f32x8(), src.as_f32x8(), mask:k)) }
19379	}
19380
19381	/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19382	///
19383	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_ps&expand=2339)
19384	#[inline]
19385	#[target_feature(enable = "avx512f,avx512vl")]
19386	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19387	#[cfg_attr(test, assert_instr(vexpandps))]
19388	pub fn _mm256_maskz_expand_ps(k: __mmask8, a: __m256) -> __m256 {
19389	unsafe { transmute(src:vexpandps256(a.as_f32x8(), src:f32x8::ZERO, mask:k)) }
19390	}
19391
19392	/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19393	///
19394	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_ps&expand=2336)
19395	#[inline]
19396	#[target_feature(enable = "avx512f,avx512vl")]
19397	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19398	#[cfg_attr(test, assert_instr(vexpandps))]
19399	pub fn _mm_mask_expand_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
19400	unsafe { transmute(src:vexpandps128(a.as_f32x4(), src.as_f32x4(), mask:k)) }
19401	}
19402
19403	/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19404	///
19405	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_ps&expand=2337)
19406	#[inline]
19407	#[target_feature(enable = "avx512f,avx512vl")]
19408	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19409	#[cfg_attr(test, assert_instr(vexpandps))]
19410	pub fn _mm_maskz_expand_ps(k: __mmask8, a: __m128) -> __m128 {
19411	unsafe { transmute(src:vexpandps128(a.as_f32x4(), src:f32x4::ZERO, mask:k)) }
19412	}
19413
19414	/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19415	///
19416	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_pd&expand=2334)
19417	#[inline]
19418	#[target_feature(enable = "avx512f")]
19419	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19420	#[cfg_attr(test, assert_instr(vexpandpd))]
19421	pub fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
19422	unsafe { transmute(src:vexpandpd(a.as_f64x8(), src.as_f64x8(), mask:k)) }
19423	}
19424
19425	/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19426	///
19427	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_pd&expand=2335)
19428	#[inline]
19429	#[target_feature(enable = "avx512f")]
19430	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19431	#[cfg_attr(test, assert_instr(vexpandpd))]
19432	pub fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d {
19433	unsafe { transmute(src:vexpandpd(a.as_f64x8(), src:f64x8::ZERO, mask:k)) }
19434	}
19435
19436	/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19437	///
19438	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_pd&expand=2332)
19439	#[inline]
19440	#[target_feature(enable = "avx512f,avx512vl")]
19441	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19442	#[cfg_attr(test, assert_instr(vexpandpd))]
19443	pub fn _mm256_mask_expand_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
19444	unsafe { transmute(src:vexpandpd256(a.as_f64x4(), src.as_f64x4(), mask:k)) }
19445	}
19446
19447	/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19448	///
19449	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_pd&expand=2333)
19450	#[inline]
19451	#[target_feature(enable = "avx512f,avx512vl")]
19452	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19453	#[cfg_attr(test, assert_instr(vexpandpd))]
19454	pub fn _mm256_maskz_expand_pd(k: __mmask8, a: __m256d) -> __m256d {
19455	unsafe { transmute(src:vexpandpd256(a.as_f64x4(), src:f64x4::ZERO, mask:k)) }
19456	}
19457
19458	/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19459	///
19460	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_pd&expand=2330)
19461	#[inline]
19462	#[target_feature(enable = "avx512f,avx512vl")]
19463	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19464	#[cfg_attr(test, assert_instr(vexpandpd))]
19465	pub fn _mm_mask_expand_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
19466	unsafe { transmute(src:vexpandpd128(a.as_f64x2(), src.as_f64x2(), mask:k)) }
19467	}
19468
19469	/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19470	///
19471	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_pd&expand=2331)
19472	#[inline]
19473	#[target_feature(enable = "avx512f,avx512vl")]
19474	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19475	#[cfg_attr(test, assert_instr(vexpandpd))]
19476	pub fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d {
19477	unsafe { transmute(src:vexpandpd128(a.as_f64x2(), src:f64x2::ZERO, mask:k)) }
19478	}
19479
19480	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19481	///
19482	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi32&expand=4685)
19483	#[inline]
19484	#[target_feature(enable = "avx512f")]
19485	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19486	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
19487	#[rustc_legacy_const_generics(`1`)]
19488	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19489	pub const fn _mm512_rol_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19490	static_assert_uimm_bits!(IMM8, `8`);
19491	_mm512_rolv_epi32(a, b:_mm512_set1_epi32(IMM8))
19492	}
19493
19494	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19495	///
19496	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi32&expand=4683)
19497	#[inline]
19498	#[target_feature(enable = "avx512f")]
19499	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19500	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
19501	#[rustc_legacy_const_generics(`3`)]
19502	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19503	pub const fn _mm512_mask_rol_epi32<const IMM8: i32>(
19504	src: __m512i,
19505	k: __mmask16,
19506	a: __m512i,
19507	) -> __m512i {
19508	static_assert_uimm_bits!(IMM8, `8`);
19509	_mm512_mask_rolv_epi32(src, k, a, b:_mm512_set1_epi32(IMM8))
19510	}
19511
19512	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19513	///
19514	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi32&expand=4684)
19515	#[inline]
19516	#[target_feature(enable = "avx512f")]
19517	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19518	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
19519	#[rustc_legacy_const_generics(`2`)]
19520	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19521	pub const fn _mm512_maskz_rol_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19522	static_assert_uimm_bits!(IMM8, `8`);
19523	_mm512_maskz_rolv_epi32(k, a, b:_mm512_set1_epi32(IMM8))
19524	}
19525
19526	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19527	///
19528	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi32&expand=4682)
19529	#[inline]
19530	#[target_feature(enable = "avx512f,avx512vl")]
19531	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19532	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
19533	#[rustc_legacy_const_generics(`1`)]
19534	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19535	pub const fn _mm256_rol_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19536	static_assert_uimm_bits!(IMM8, `8`);
19537	_mm256_rolv_epi32(a, b:_mm256_set1_epi32(IMM8))
19538	}
19539
19540	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19541	///
19542	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi32&expand=4680)
19543	#[inline]
19544	#[target_feature(enable = "avx512f,avx512vl")]
19545	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19546	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
19547	#[rustc_legacy_const_generics(`3`)]
19548	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19549	pub const fn _mm256_mask_rol_epi32<const IMM8: i32>(
19550	src: __m256i,
19551	k: __mmask8,
19552	a: __m256i,
19553	) -> __m256i {
19554	static_assert_uimm_bits!(IMM8, `8`);
19555	_mm256_mask_rolv_epi32(src, k, a, b:_mm256_set1_epi32(IMM8))
19556	}
19557
19558	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19559	///
19560	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi32&expand=4681)
19561	#[inline]
19562	#[target_feature(enable = "avx512f,avx512vl")]
19563	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19564	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
19565	#[rustc_legacy_const_generics(`2`)]
19566	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19567	pub const fn _mm256_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19568	static_assert_uimm_bits!(IMM8, `8`);
19569	_mm256_maskz_rolv_epi32(k, a, b:_mm256_set1_epi32(IMM8))
19570	}
19571
19572	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19573	///
19574	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi32&expand=4679)
19575	#[inline]
19576	#[target_feature(enable = "avx512f,avx512vl")]
19577	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19578	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
19579	#[rustc_legacy_const_generics(`1`)]
19580	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19581	pub const fn _mm_rol_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19582	static_assert_uimm_bits!(IMM8, `8`);
19583	_mm_rolv_epi32(a, b:_mm_set1_epi32(IMM8))
19584	}
19585
19586	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19587	///
19588	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi32&expand=4677)
19589	#[inline]
19590	#[target_feature(enable = "avx512f,avx512vl")]
19591	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19592	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
19593	#[rustc_legacy_const_generics(`3`)]
19594	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19595	pub const fn _mm_mask_rol_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19596	static_assert_uimm_bits!(IMM8, `8`);
19597	_mm_mask_rolv_epi32(src, k, a, b:_mm_set1_epi32(IMM8))
19598	}
19599
19600	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19601	///
19602	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi32&expand=4678)
19603	#[inline]
19604	#[target_feature(enable = "avx512f,avx512vl")]
19605	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19606	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
19607	#[rustc_legacy_const_generics(`2`)]
19608	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19609	pub const fn _mm_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19610	static_assert_uimm_bits!(IMM8, `8`);
19611	_mm_maskz_rolv_epi32(k, a, b:_mm_set1_epi32(IMM8))
19612	}
19613
19614	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19615	///
19616	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi32&expand=4721)
19617	#[inline]
19618	#[target_feature(enable = "avx512f")]
19619	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19620	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
19621	#[rustc_legacy_const_generics(`1`)]
19622	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19623	pub const fn _mm512_ror_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19624	static_assert_uimm_bits!(IMM8, `8`);
19625	_mm512_rorv_epi32(a, b:_mm512_set1_epi32(IMM8))
19626	}
19627
19628	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19629	///
19630	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi32&expand=4719)
19631	#[inline]
19632	#[target_feature(enable = "avx512f")]
19633	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19634	#[cfg_attr(test, assert_instr(vprold, IMM8 = `123`))]
19635	#[rustc_legacy_const_generics(`3`)]
19636	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19637	pub const fn _mm512_mask_ror_epi32<const IMM8: i32>(
19638	src: __m512i,
19639	k: __mmask16,
19640	a: __m512i,
19641	) -> __m512i {
19642	static_assert_uimm_bits!(IMM8, `8`);
19643	_mm512_mask_rorv_epi32(src, k, a, b:_mm512_set1_epi32(IMM8))
19644	}
19645
19646	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19647	///
19648	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi32&expand=4720)
19649	#[inline]
19650	#[target_feature(enable = "avx512f")]
19651	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19652	#[cfg_attr(test, assert_instr(vprold, IMM8 = `123`))]
19653	#[rustc_legacy_const_generics(`2`)]
19654	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19655	pub const fn _mm512_maskz_ror_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19656	static_assert_uimm_bits!(IMM8, `8`);
19657	_mm512_maskz_rorv_epi32(k, a, b:_mm512_set1_epi32(IMM8))
19658	}
19659
19660	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19661	///
19662	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi32&expand=4718)
19663	#[inline]
19664	#[target_feature(enable = "avx512f,avx512vl")]
19665	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19666	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
19667	#[rustc_legacy_const_generics(`1`)]
19668	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19669	pub const fn _mm256_ror_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19670	static_assert_uimm_bits!(IMM8, `8`);
19671	_mm256_rorv_epi32(a, b:_mm256_set1_epi32(IMM8))
19672	}
19673
19674	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19675	///
19676	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi32&expand=4716)
19677	#[inline]
19678	#[target_feature(enable = "avx512f,avx512vl")]
19679	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19680	#[cfg_attr(test, assert_instr(vprold, IMM8 = `123`))]
19681	#[rustc_legacy_const_generics(`3`)]
19682	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19683	pub const fn _mm256_mask_ror_epi32<const IMM8: i32>(
19684	src: __m256i,
19685	k: __mmask8,
19686	a: __m256i,
19687	) -> __m256i {
19688	static_assert_uimm_bits!(IMM8, `8`);
19689	_mm256_mask_rorv_epi32(src, k, a, b:_mm256_set1_epi32(IMM8))
19690	}
19691
19692	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19693	///
19694	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi32&expand=4717)
19695	#[inline]
19696	#[target_feature(enable = "avx512f,avx512vl")]
19697	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19698	#[cfg_attr(test, assert_instr(vprold, IMM8 = `123`))]
19699	#[rustc_legacy_const_generics(`2`)]
19700	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19701	pub const fn _mm256_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19702	static_assert_uimm_bits!(IMM8, `8`);
19703	_mm256_maskz_rorv_epi32(k, a, b:_mm256_set1_epi32(IMM8))
19704	}
19705
19706	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19707	///
19708	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi32&expand=4715)
19709	#[inline]
19710	#[target_feature(enable = "avx512f,avx512vl")]
19711	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19712	#[cfg_attr(test, assert_instr(vprold, IMM8 = `1`))]
19713	#[rustc_legacy_const_generics(`1`)]
19714	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19715	pub const fn _mm_ror_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19716	static_assert_uimm_bits!(IMM8, `8`);
19717	_mm_rorv_epi32(a, b:_mm_set1_epi32(IMM8))
19718	}
19719
19720	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19721	///
19722	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi32&expand=4713)
19723	#[inline]
19724	#[target_feature(enable = "avx512f,avx512vl")]
19725	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19726	#[cfg_attr(test, assert_instr(vprold, IMM8 = `123`))]
19727	#[rustc_legacy_const_generics(`3`)]
19728	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19729	pub const fn _mm_mask_ror_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19730	static_assert_uimm_bits!(IMM8, `8`);
19731	_mm_mask_rorv_epi32(src, k, a, b:_mm_set1_epi32(IMM8))
19732	}
19733
19734	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19735	///
19736	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi32&expand=4714)
19737	#[inline]
19738	#[target_feature(enable = "avx512f,avx512vl")]
19739	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19740	#[cfg_attr(test, assert_instr(vprold, IMM8 = `123`))]
19741	#[rustc_legacy_const_generics(`2`)]
19742	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19743	pub const fn _mm_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19744	static_assert_uimm_bits!(IMM8, `8`);
19745	_mm_maskz_rorv_epi32(k, a, b:_mm_set1_epi32(IMM8))
19746	}
19747
19748	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19749	///
19750	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi64&expand=4694)
19751	#[inline]
19752	#[target_feature(enable = "avx512f")]
19753	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19754	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
19755	#[rustc_legacy_const_generics(`1`)]
19756	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19757	pub const fn _mm512_rol_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19758	static_assert_uimm_bits!(IMM8, `8`);
19759	_mm512_rolv_epi64(a, b:_mm512_set1_epi64(IMM8 as i64))
19760	}
19761
19762	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19763	///
19764	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi64&expand=4692)
19765	#[inline]
19766	#[target_feature(enable = "avx512f")]
19767	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19768	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
19769	#[rustc_legacy_const_generics(`3`)]
19770	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19771	pub const fn _mm512_mask_rol_epi64<const IMM8: i32>(
19772	src: __m512i,
19773	k: __mmask8,
19774	a: __m512i,
19775	) -> __m512i {
19776	static_assert_uimm_bits!(IMM8, `8`);
19777	_mm512_mask_rolv_epi64(src, k, a, b:_mm512_set1_epi64(IMM8 as i64))
19778	}
19779
19780	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19781	///
19782	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi64&expand=4693)
19783	#[inline]
19784	#[target_feature(enable = "avx512f")]
19785	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19786	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
19787	#[rustc_legacy_const_generics(`2`)]
19788	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19789	pub const fn _mm512_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19790	static_assert_uimm_bits!(IMM8, `8`);
19791	_mm512_maskz_rolv_epi64(k, a, b:_mm512_set1_epi64(IMM8 as i64))
19792	}
19793
19794	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19795	///
19796	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi64&expand=4691)
19797	#[inline]
19798	#[target_feature(enable = "avx512f,avx512vl")]
19799	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19800	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
19801	#[rustc_legacy_const_generics(`1`)]
19802	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19803	pub const fn _mm256_rol_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19804	static_assert_uimm_bits!(IMM8, `8`);
19805	_mm256_rolv_epi64(a, b:_mm256_set1_epi64x(IMM8 as i64))
19806	}
19807
19808	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19809	///
19810	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi64&expand=4689)
19811	#[inline]
19812	#[target_feature(enable = "avx512f,avx512vl")]
19813	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19814	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
19815	#[rustc_legacy_const_generics(`3`)]
19816	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19817	pub const fn _mm256_mask_rol_epi64<const IMM8: i32>(
19818	src: __m256i,
19819	k: __mmask8,
19820	a: __m256i,
19821	) -> __m256i {
19822	static_assert_uimm_bits!(IMM8, `8`);
19823	_mm256_mask_rolv_epi64(src, k, a, b:_mm256_set1_epi64x(IMM8 as i64))
19824	}
19825
19826	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19827	///
19828	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi64&expand=4690)
19829	#[inline]
19830	#[target_feature(enable = "avx512f,avx512vl")]
19831	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19832	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
19833	#[rustc_legacy_const_generics(`2`)]
19834	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19835	pub const fn _mm256_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19836	static_assert_uimm_bits!(IMM8, `8`);
19837	_mm256_maskz_rolv_epi64(k, a, b:_mm256_set1_epi64x(IMM8 as i64))
19838	}
19839
19840	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19841	///
19842	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi64&expand=4688)
19843	#[inline]
19844	#[target_feature(enable = "avx512f,avx512vl")]
19845	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19846	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
19847	#[rustc_legacy_const_generics(`1`)]
19848	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19849	pub const fn _mm_rol_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19850	static_assert_uimm_bits!(IMM8, `8`);
19851	_mm_rolv_epi64(a, b:_mm_set1_epi64x(IMM8 as i64))
19852	}
19853
19854	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19855	///
19856	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi64&expand=4686)
19857	#[inline]
19858	#[target_feature(enable = "avx512f,avx512vl")]
19859	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19860	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
19861	#[rustc_legacy_const_generics(`3`)]
19862	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19863	pub const fn _mm_mask_rol_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19864	static_assert_uimm_bits!(IMM8, `8`);
19865	_mm_mask_rolv_epi64(src, k, a, b:_mm_set1_epi64x(IMM8 as i64))
19866	}
19867
19868	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19869	///
19870	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi64&expand=4687)
19871	#[inline]
19872	#[target_feature(enable = "avx512f,avx512vl")]
19873	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19874	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `1`))]
19875	#[rustc_legacy_const_generics(`2`)]
19876	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19877	pub const fn _mm_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19878	static_assert_uimm_bits!(IMM8, `8`);
19879	_mm_maskz_rolv_epi64(k, a, b:_mm_set1_epi64x(IMM8 as i64))
19880	}
19881
19882	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19883	///
19884	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi64&expand=4730)
19885	#[inline]
19886	#[target_feature(enable = "avx512f")]
19887	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19888	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
19889	#[rustc_legacy_const_generics(`1`)]
19890	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19891	pub const fn _mm512_ror_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19892	static_assert_uimm_bits!(IMM8, `8`);
19893	_mm512_rorv_epi64(a, b:_mm512_set1_epi64(IMM8 as i64))
19894	}
19895
19896	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19897	///
19898	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi64&expand=4728)
19899	#[inline]
19900	#[target_feature(enable = "avx512f")]
19901	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19902	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
19903	#[rustc_legacy_const_generics(`3`)]
19904	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19905	pub const fn _mm512_mask_ror_epi64<const IMM8: i32>(
19906	src: __m512i,
19907	k: __mmask8,
19908	a: __m512i,
19909	) -> __m512i {
19910	static_assert_uimm_bits!(IMM8, `8`);
19911	_mm512_mask_rorv_epi64(src, k, a, b:_mm512_set1_epi64(IMM8 as i64))
19912	}
19913
19914	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19915	///
19916	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi64&expand=4729)
19917	#[inline]
19918	#[target_feature(enable = "avx512f")]
19919	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19920	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
19921	#[rustc_legacy_const_generics(`2`)]
19922	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19923	pub const fn _mm512_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19924	static_assert_uimm_bits!(IMM8, `8`);
19925	_mm512_maskz_rorv_epi64(k, a, b:_mm512_set1_epi64(IMM8 as i64))
19926	}
19927
19928	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19929	///
19930	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi64&expand=4727)
19931	#[inline]
19932	#[target_feature(enable = "avx512f,avx512vl")]
19933	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19934	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
19935	#[rustc_legacy_const_generics(`1`)]
19936	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19937	pub const fn _mm256_ror_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19938	static_assert_uimm_bits!(IMM8, `8`);
19939	_mm256_rorv_epi64(a, b:_mm256_set1_epi64x(IMM8 as i64))
19940	}
19941
19942	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19943	///
19944	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi64&expand=4725)
19945	#[inline]
19946	#[target_feature(enable = "avx512f,avx512vl")]
19947	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19948	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
19949	#[rustc_legacy_const_generics(`3`)]
19950	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19951	pub const fn _mm256_mask_ror_epi64<const IMM8: i32>(
19952	src: __m256i,
19953	k: __mmask8,
19954	a: __m256i,
19955	) -> __m256i {
19956	static_assert_uimm_bits!(IMM8, `8`);
19957	_mm256_mask_rorv_epi64(src, k, a, b:_mm256_set1_epi64x(IMM8 as i64))
19958	}
19959
19960	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19961	///
19962	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi64&expand=4726)
19963	#[inline]
19964	#[target_feature(enable = "avx512f,avx512vl")]
19965	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19966	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
19967	#[rustc_legacy_const_generics(`2`)]
19968	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19969	pub const fn _mm256_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19970	static_assert_uimm_bits!(IMM8, `8`);
19971	_mm256_maskz_rorv_epi64(k, a, b:_mm256_set1_epi64x(IMM8 as i64))
19972	}
19973
19974	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19975	///
19976	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi64&expand=4724)
19977	#[inline]
19978	#[target_feature(enable = "avx512f,avx512vl")]
19979	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19980	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
19981	#[rustc_legacy_const_generics(`1`)]
19982	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19983	pub const fn _mm_ror_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19984	static_assert_uimm_bits!(IMM8, `8`);
19985	_mm_rorv_epi64(a, b:_mm_set1_epi64x(IMM8 as i64))
19986	}
19987
19988	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19989	///
19990	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi64&expand=4722)
19991	#[inline]
19992	#[target_feature(enable = "avx512f,avx512vl")]
19993	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19994	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
19995	#[rustc_legacy_const_generics(`3`)]
19996	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19997	pub const fn _mm_mask_ror_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19998	static_assert_uimm_bits!(IMM8, `8`);
19999	_mm_mask_rorv_epi64(src, k, a, b:_mm_set1_epi64x(IMM8 as i64))
20000	}
20001
20002	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20003	///
20004	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi64&expand=4723)
20005	#[inline]
20006	#[target_feature(enable = "avx512f,avx512vl")]
20007	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20008	#[cfg_attr(test, assert_instr(vprolq, IMM8 = `15`))]
20009	#[rustc_legacy_const_generics(`2`)]
20010	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20011	pub const fn _mm_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
20012	static_assert_uimm_bits!(IMM8, `8`);
20013	_mm_maskz_rorv_epi64(k, a, b:_mm_set1_epi64x(IMM8 as i64))
20014	}
20015
20016	/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
20017	///
20018	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi32&expand=5310)
20019	#[inline]
20020	#[target_feature(enable = "avx512f")]
20021	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20022	#[cfg_attr(test, assert_instr(vpslld, IMM8 = `5`))]
20023	#[rustc_legacy_const_generics(`1`)]
20024	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20025	pub const fn _mm512_slli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
20026	unsafe {
20027	static_assert_uimm_bits!(IMM8, `8`);
20028	if IMM8 >= `32` {
20029	_mm512_setzero_si512()
20030	} else {
20031	transmute(src:simd_shl(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8)))
20032	}
20033	}
20034	}
20035
20036	/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20037	///
20038	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi32&expand=5308)
20039	#[inline]
20040	#[target_feature(enable = "avx512f")]
20041	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20042	#[cfg_attr(test, assert_instr(vpslld, IMM8 = `5`))]
20043	#[rustc_legacy_const_generics(`3`)]
20044	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20045	pub const fn _mm512_mask_slli_epi32<const IMM8: u32>(
20046	src: __m512i,
20047	k: __mmask16,
20048	a: __m512i,
20049	) -> __m512i {
20050	unsafe {
20051	static_assert_uimm_bits!(IMM8, `8`);
20052	let shf: Simd = if IMM8 >= `32` {
20053	u32x16::ZERO
20054	} else {
20055	simd_shl(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8))
20056	};
20057	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u32x16()))
20058	}
20059	}
20060
20061	/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20062	///
20063	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi32&expand=5309)
20064	#[inline]
20065	#[target_feature(enable = "avx512f")]
20066	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20067	#[cfg_attr(test, assert_instr(vpslld, IMM8 = `5`))]
20068	#[rustc_legacy_const_generics(`2`)]
20069	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20070	pub const fn _mm512_maskz_slli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
20071	unsafe {
20072	static_assert_uimm_bits!(IMM8, `8`);
20073	if IMM8 >= `32` {
20074	_mm512_setzero_si512()
20075	} else {
20076	let shf: Simd = simd_shl(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8));
20077	transmute(src:simd_select_bitmask(m:k, yes:shf, no:u32x16::ZERO))
20078	}
20079	}
20080	}
20081
20082	/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20083	///
20084	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi32&expand=5305)
20085	#[inline]
20086	#[target_feature(enable = "avx512f,avx512vl")]
20087	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20088	#[cfg_attr(test, assert_instr(vpslld, IMM8 = `5`))]
20089	#[rustc_legacy_const_generics(`3`)]
20090	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20091	pub const fn _mm256_mask_slli_epi32<const IMM8: u32>(
20092	src: __m256i,
20093	k: __mmask8,
20094	a: __m256i,
20095	) -> __m256i {
20096	unsafe {
20097	static_assert_uimm_bits!(IMM8, `8`);
20098	let r: Simd = if IMM8 >= `32` {
20099	u32x8::ZERO
20100	} else {
20101	simd_shl(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8))
20102	};
20103	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x8()))
20104	}
20105	}
20106
20107	/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20108	///
20109	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi32&expand=5306)
20110	#[inline]
20111	#[target_feature(enable = "avx512f,avx512vl")]
20112	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20113	#[cfg_attr(test, assert_instr(vpslld, IMM8 = `5`))]
20114	#[rustc_legacy_const_generics(`2`)]
20115	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20116	pub const fn _mm256_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20117	unsafe {
20118	static_assert_uimm_bits!(IMM8, `8`);
20119	if IMM8 >= `32` {
20120	_mm256_setzero_si256()
20121	} else {
20122	let r: Simd = simd_shl(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8));
20123	transmute(src:simd_select_bitmask(m:k, yes:r, no:u32x8::ZERO))
20124	}
20125	}
20126	}
20127
20128	/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20129	///
20130	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi32&expand=5302)
20131	#[inline]
20132	#[target_feature(enable = "avx512f,avx512vl")]
20133	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20134	#[cfg_attr(test, assert_instr(vpslld, IMM8 = `5`))]
20135	#[rustc_legacy_const_generics(`3`)]
20136	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20137	pub const fn _mm_mask_slli_epi32<const IMM8: u32>(
20138	src: __m128i,
20139	k: __mmask8,
20140	a: __m128i,
20141	) -> __m128i {
20142	unsafe {
20143	static_assert_uimm_bits!(IMM8, `8`);
20144	let r: Simd = if IMM8 >= `32` {
20145	u32x4::ZERO
20146	} else {
20147	simd_shl(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8))
20148	};
20149	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x4()))
20150	}
20151	}
20152
20153	/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20154	///
20155	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi32&expand=5303)
20156	#[inline]
20157	#[target_feature(enable = "avx512f,avx512vl")]
20158	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20159	#[cfg_attr(test, assert_instr(vpslld, IMM8 = `5`))]
20160	#[rustc_legacy_const_generics(`2`)]
20161	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20162	pub const fn _mm_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20163	unsafe {
20164	static_assert_uimm_bits!(IMM8, `8`);
20165	if IMM8 >= `32` {
20166	_mm_setzero_si128()
20167	} else {
20168	let r: Simd = simd_shl(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8));
20169	transmute(src:simd_select_bitmask(m:k, yes:r, no:u32x4::ZERO))
20170	}
20171	}
20172	}
20173
20174	/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
20175	///
20176	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi32&expand=5522)
20177	#[inline]
20178	#[target_feature(enable = "avx512f")]
20179	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20180	#[cfg_attr(test, assert_instr(vpsrld, IMM8 = `1`))]
20181	#[rustc_legacy_const_generics(`1`)]
20182	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20183	pub const fn _mm512_srli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
20184	unsafe {
20185	static_assert_uimm_bits!(IMM8, `8`);
20186	if IMM8 >= `32` {
20187	_mm512_setzero_si512()
20188	} else {
20189	transmute(src:simd_shr(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8)))
20190	}
20191	}
20192	}
20193
20194	/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20195	///
20196	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi32&expand=5520)
20197	#[inline]
20198	#[target_feature(enable = "avx512f")]
20199	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20200	#[cfg_attr(test, assert_instr(vpsrld, IMM8 = `1`))]
20201	#[rustc_legacy_const_generics(`3`)]
20202	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20203	pub const fn _mm512_mask_srli_epi32<const IMM8: u32>(
20204	src: __m512i,
20205	k: __mmask16,
20206	a: __m512i,
20207	) -> __m512i {
20208	unsafe {
20209	static_assert_uimm_bits!(IMM8, `8`);
20210	let shf: Simd = if IMM8 >= `32` {
20211	u32x16::ZERO
20212	} else {
20213	simd_shr(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8))
20214	};
20215	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u32x16()))
20216	}
20217	}
20218
20219	/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20220	///
20221	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi32&expand=5521)
20222	#[inline]
20223	#[target_feature(enable = "avx512f")]
20224	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20225	#[cfg_attr(test, assert_instr(vpsrld, IMM8 = `1`))]
20226	#[rustc_legacy_const_generics(`2`)]
20227	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20228	pub const fn _mm512_maskz_srli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
20229	unsafe {
20230	static_assert_uimm_bits!(IMM8, `8`);
20231	if IMM8 >= `32` {
20232	_mm512_setzero_si512()
20233	} else {
20234	let shf: Simd = simd_shr(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8));
20235	transmute(src:simd_select_bitmask(m:k, yes:shf, no:u32x16::ZERO))
20236	}
20237	}
20238	}
20239
20240	/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20241	///
20242	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi32&expand=5517)
20243	#[inline]
20244	#[target_feature(enable = "avx512f,avx512vl")]
20245	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20246	#[cfg_attr(test, assert_instr(vpsrld, IMM8 = `1`))]
20247	#[rustc_legacy_const_generics(`3`)]
20248	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20249	pub const fn _mm256_mask_srli_epi32<const IMM8: u32>(
20250	src: __m256i,
20251	k: __mmask8,
20252	a: __m256i,
20253	) -> __m256i {
20254	unsafe {
20255	static_assert_uimm_bits!(IMM8, `8`);
20256	let r: Simd = if IMM8 >= `32` {
20257	u32x8::ZERO
20258	} else {
20259	simd_shr(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8))
20260	};
20261	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x8()))
20262	}
20263	}
20264
20265	/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20266	///
20267	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi32&expand=5518)
20268	#[inline]
20269	#[target_feature(enable = "avx512f,avx512vl")]
20270	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20271	#[cfg_attr(test, assert_instr(vpsrld, IMM8 = `1`))]
20272	#[rustc_legacy_const_generics(`2`)]
20273	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20274	pub const fn _mm256_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20275	unsafe {
20276	static_assert_uimm_bits!(IMM8, `8`);
20277	if IMM8 >= `32` {
20278	_mm256_setzero_si256()
20279	} else {
20280	let r: Simd = simd_shr(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8));
20281	transmute(src:simd_select_bitmask(m:k, yes:r, no:u32x8::ZERO))
20282	}
20283	}
20284	}
20285
20286	/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20287	///
20288	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi32&expand=5514)
20289	#[inline]
20290	#[target_feature(enable = "avx512f,avx512vl")]
20291	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20292	#[cfg_attr(test, assert_instr(vpsrld, IMM8 = `1`))]
20293	#[rustc_legacy_const_generics(`3`)]
20294	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20295	pub const fn _mm_mask_srli_epi32<const IMM8: u32>(
20296	src: __m128i,
20297	k: __mmask8,
20298	a: __m128i,
20299	) -> __m128i {
20300	unsafe {
20301	static_assert_uimm_bits!(IMM8, `8`);
20302	let r: Simd = if IMM8 >= `32` {
20303	u32x4::ZERO
20304	} else {
20305	simd_shr(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8))
20306	};
20307	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x4()))
20308	}
20309	}
20310
20311	/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20312	///
20313	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi32&expand=5515)
20314	#[inline]
20315	#[target_feature(enable = "avx512f,avx512vl")]
20316	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20317	#[cfg_attr(test, assert_instr(vpsrld, IMM8 = `1`))]
20318	#[rustc_legacy_const_generics(`2`)]
20319	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20320	pub const fn _mm_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20321	unsafe {
20322	static_assert_uimm_bits!(IMM8, `8`);
20323	if IMM8 >= `32` {
20324	_mm_setzero_si128()
20325	} else {
20326	let r: Simd = simd_shr(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8));
20327	transmute(src:simd_select_bitmask(m:k, yes:r, no:u32x4::ZERO))
20328	}
20329	}
20330	}
20331
20332	/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
20333	///
20334	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi64&expand=5319)
20335	#[inline]
20336	#[target_feature(enable = "avx512f")]
20337	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20338	#[cfg_attr(test, assert_instr(vpsllq, IMM8 = `5`))]
20339	#[rustc_legacy_const_generics(`1`)]
20340	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20341	pub const fn _mm512_slli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20342	unsafe {
20343	static_assert_uimm_bits!(IMM8, `8`);
20344	if IMM8 >= `64` {
20345	_mm512_setzero_si512()
20346	} else {
20347	transmute(src:simd_shl(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64)))
20348	}
20349	}
20350	}
20351
20352	/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20353	///
20354	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi64&expand=5317)
20355	#[inline]
20356	#[target_feature(enable = "avx512f")]
20357	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20358	#[cfg_attr(test, assert_instr(vpsllq, IMM8 = `5`))]
20359	#[rustc_legacy_const_generics(`3`)]
20360	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20361	pub const fn _mm512_mask_slli_epi64<const IMM8: u32>(
20362	src: __m512i,
20363	k: __mmask8,
20364	a: __m512i,
20365	) -> __m512i {
20366	unsafe {
20367	static_assert_uimm_bits!(IMM8, `8`);
20368	let shf: Simd = if IMM8 >= `64` {
20369	u64x8::ZERO
20370	} else {
20371	simd_shl(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64))
20372	};
20373	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u64x8()))
20374	}
20375	}
20376
20377	/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20378	///
20379	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi64&expand=5318)
20380	#[inline]
20381	#[target_feature(enable = "avx512f")]
20382	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20383	#[cfg_attr(test, assert_instr(vpsllq, IMM8 = `5`))]
20384	#[rustc_legacy_const_generics(`2`)]
20385	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20386	pub const fn _mm512_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20387	unsafe {
20388	static_assert_uimm_bits!(IMM8, `8`);
20389	if IMM8 >= `64` {
20390	_mm512_setzero_si512()
20391	} else {
20392	let shf: Simd = simd_shl(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64));
20393	transmute(src:simd_select_bitmask(m:k, yes:shf, no:u64x8::ZERO))
20394	}
20395	}
20396	}
20397
20398	/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20399	///
20400	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi64&expand=5314)
20401	#[inline]
20402	#[target_feature(enable = "avx512f,avx512vl")]
20403	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20404	#[cfg_attr(test, assert_instr(vpsllq, IMM8 = `5`))]
20405	#[rustc_legacy_const_generics(`3`)]
20406	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20407	pub const fn _mm256_mask_slli_epi64<const IMM8: u32>(
20408	src: __m256i,
20409	k: __mmask8,
20410	a: __m256i,
20411	) -> __m256i {
20412	unsafe {
20413	static_assert_uimm_bits!(IMM8, `8`);
20414	let r: Simd = if IMM8 >= `64` {
20415	u64x4::ZERO
20416	} else {
20417	simd_shl(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64))
20418	};
20419	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x4()))
20420	}
20421	}
20422
20423	/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20424	///
20425	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi64&expand=5315)
20426	#[inline]
20427	#[target_feature(enable = "avx512f,avx512vl")]
20428	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20429	#[cfg_attr(test, assert_instr(vpsllq, IMM8 = `5`))]
20430	#[rustc_legacy_const_generics(`2`)]
20431	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20432	pub const fn _mm256_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20433	unsafe {
20434	static_assert_uimm_bits!(IMM8, `8`);
20435	if IMM8 >= `64` {
20436	_mm256_setzero_si256()
20437	} else {
20438	let r: Simd = simd_shl(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64));
20439	transmute(src:simd_select_bitmask(m:k, yes:r, no:u64x4::ZERO))
20440	}
20441	}
20442	}
20443
20444	/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20445	///
20446	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi64&expand=5311)
20447	#[inline]
20448	#[target_feature(enable = "avx512f,avx512vl")]
20449	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20450	#[cfg_attr(test, assert_instr(vpsllq, IMM8 = `5`))]
20451	#[rustc_legacy_const_generics(`3`)]
20452	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20453	pub const fn _mm_mask_slli_epi64<const IMM8: u32>(
20454	src: __m128i,
20455	k: __mmask8,
20456	a: __m128i,
20457	) -> __m128i {
20458	unsafe {
20459	static_assert_uimm_bits!(IMM8, `8`);
20460	let r: Simd = if IMM8 >= `64` {
20461	u64x2::ZERO
20462	} else {
20463	simd_shl(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64))
20464	};
20465	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x2()))
20466	}
20467	}
20468
20469	/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20470	///
20471	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi64&expand=5312)
20472	#[inline]
20473	#[target_feature(enable = "avx512f,avx512vl")]
20474	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20475	#[cfg_attr(test, assert_instr(vpsllq, IMM8 = `5`))]
20476	#[rustc_legacy_const_generics(`2`)]
20477	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20478	pub const fn _mm_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20479	unsafe {
20480	static_assert_uimm_bits!(IMM8, `8`);
20481	if IMM8 >= `64` {
20482	_mm_setzero_si128()
20483	} else {
20484	let r: Simd = simd_shl(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64));
20485	transmute(src:simd_select_bitmask(m:k, yes:r, no:u64x2::ZERO))
20486	}
20487	}
20488	}
20489
20490	/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
20491	///
20492	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi64&expand=5531)
20493	#[inline]
20494	#[target_feature(enable = "avx512f")]
20495	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20496	#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = `1`))]
20497	#[rustc_legacy_const_generics(`1`)]
20498	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20499	pub const fn _mm512_srli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20500	unsafe {
20501	static_assert_uimm_bits!(IMM8, `8`);
20502	if IMM8 >= `64` {
20503	_mm512_setzero_si512()
20504	} else {
20505	transmute(src:simd_shr(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64)))
20506	}
20507	}
20508	}
20509
20510	/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20511	///
20512	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi64&expand=5529)
20513	#[inline]
20514	#[target_feature(enable = "avx512f")]
20515	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20516	#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = `1`))]
20517	#[rustc_legacy_const_generics(`3`)]
20518	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20519	pub const fn _mm512_mask_srli_epi64<const IMM8: u32>(
20520	src: __m512i,
20521	k: __mmask8,
20522	a: __m512i,
20523	) -> __m512i {
20524	unsafe {
20525	static_assert_uimm_bits!(IMM8, `8`);
20526	let shf: Simd = if IMM8 >= `64` {
20527	u64x8::ZERO
20528	} else {
20529	simd_shr(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64))
20530	};
20531	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u64x8()))
20532	}
20533	}
20534
20535	/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20536	///
20537	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi64&expand=5530)
20538	#[inline]
20539	#[target_feature(enable = "avx512f")]
20540	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20541	#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = `1`))]
20542	#[rustc_legacy_const_generics(`2`)]
20543	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20544	pub const fn _mm512_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20545	unsafe {
20546	static_assert_uimm_bits!(IMM8, `8`);
20547	if IMM8 >= `64` {
20548	_mm512_setzero_si512()
20549	} else {
20550	let shf: Simd = simd_shr(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64));
20551	transmute(src:simd_select_bitmask(m:k, yes:shf, no:u64x8::ZERO))
20552	}
20553	}
20554	}
20555
20556	/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20557	///
20558	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi64&expand=5526)
20559	#[inline]
20560	#[target_feature(enable = "avx512f,avx512vl")]
20561	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20562	#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = `1`))]
20563	#[rustc_legacy_const_generics(`3`)]
20564	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20565	pub const fn _mm256_mask_srli_epi64<const IMM8: u32>(
20566	src: __m256i,
20567	k: __mmask8,
20568	a: __m256i,
20569	) -> __m256i {
20570	unsafe {
20571	static_assert_uimm_bits!(IMM8, `8`);
20572	let r: Simd = if IMM8 >= `64` {
20573	u64x4::ZERO
20574	} else {
20575	simd_shr(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64))
20576	};
20577	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x4()))
20578	}
20579	}
20580
20581	/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20582	///
20583	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi64&expand=5527)
20584	#[inline]
20585	#[target_feature(enable = "avx512f,avx512vl")]
20586	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20587	#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = `1`))]
20588	#[rustc_legacy_const_generics(`2`)]
20589	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20590	pub const fn _mm256_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20591	unsafe {
20592	static_assert_uimm_bits!(IMM8, `8`);
20593	if IMM8 >= `64` {
20594	_mm256_setzero_si256()
20595	} else {
20596	let r: Simd = simd_shr(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64));
20597	transmute(src:simd_select_bitmask(m:k, yes:r, no:u64x4::ZERO))
20598	}
20599	}
20600	}
20601
20602	/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20603	///
20604	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi64&expand=5523)
20605	#[inline]
20606	#[target_feature(enable = "avx512f,avx512vl")]
20607	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20608	#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = `1`))]
20609	#[rustc_legacy_const_generics(`3`)]
20610	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20611	pub const fn _mm_mask_srli_epi64<const IMM8: u32>(
20612	src: __m128i,
20613	k: __mmask8,
20614	a: __m128i,
20615	) -> __m128i {
20616	unsafe {
20617	static_assert_uimm_bits!(IMM8, `8`);
20618	let r: Simd = if IMM8 >= `64` {
20619	u64x2::ZERO
20620	} else {
20621	simd_shr(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64))
20622	};
20623	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x2()))
20624	}
20625	}
20626
20627	/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20628	///
20629	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi64&expand=5524)
20630	#[inline]
20631	#[target_feature(enable = "avx512f,avx512vl")]
20632	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20633	#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = `1`))]
20634	#[rustc_legacy_const_generics(`2`)]
20635	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20636	pub const fn _mm_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20637	unsafe {
20638	static_assert_uimm_bits!(IMM8, `8`);
20639	if IMM8 >= `64` {
20640	_mm_setzero_si128()
20641	} else {
20642	let r: Simd = simd_shr(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64));
20643	transmute(src:simd_select_bitmask(m:k, yes:r, no:u64x2::ZERO))
20644	}
20645	}
20646	}
20647
20648	/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst.
20649	///
20650	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi32&expand=5280)
20651	#[inline]
20652	#[target_feature(enable = "avx512f")]
20653	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20654	#[cfg_attr(test, assert_instr(vpslld))]
20655	pub fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
20656	unsafe { transmute(src:vpslld(a.as_i32x16(), count.as_i32x4())) }
20657	}
20658
20659	/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20660	///
20661	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi32&expand=5278)
20662	#[inline]
20663	#[target_feature(enable = "avx512f")]
20664	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20665	#[cfg_attr(test, assert_instr(vpslld))]
20666	pub fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20667	unsafe {
20668	let shf: Simd = _mm512_sll_epi32(a, count).as_i32x16();
20669	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
20670	}
20671	}
20672
20673	/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20674	///
20675	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi32&expand=5279)
20676	#[inline]
20677	#[target_feature(enable = "avx512f")]
20678	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20679	#[cfg_attr(test, assert_instr(vpslld))]
20680	pub fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20681	unsafe {
20682	let shf: Simd = _mm512_sll_epi32(a, count).as_i32x16();
20683	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
20684	}
20685	}
20686
20687	/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20688	///
20689	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi32&expand=5275)
20690	#[inline]
20691	#[target_feature(enable = "avx512f,avx512vl")]
20692	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20693	#[cfg_attr(test, assert_instr(vpslld))]
20694	pub fn _mm256_mask_sll_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20695	unsafe {
20696	let shf: Simd = _mm256_sll_epi32(a, count).as_i32x8();
20697	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
20698	}
20699	}
20700
20701	/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20702	///
20703	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi32&expand=5276)
20704	#[inline]
20705	#[target_feature(enable = "avx512f,avx512vl")]
20706	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20707	#[cfg_attr(test, assert_instr(vpslld))]
20708	pub fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20709	unsafe {
20710	let shf: Simd = _mm256_sll_epi32(a, count).as_i32x8();
20711	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
20712	}
20713	}
20714
20715	/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20716	///
20717	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi32&expand=5272)
20718	#[inline]
20719	#[target_feature(enable = "avx512f,avx512vl")]
20720	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20721	#[cfg_attr(test, assert_instr(vpslld))]
20722	pub fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20723	unsafe {
20724	let shf: Simd = _mm_sll_epi32(a, count).as_i32x4();
20725	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
20726	}
20727	}
20728
20729	/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20730	///
20731	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi32&expand=5273)
20732	#[inline]
20733	#[target_feature(enable = "avx512f,avx512vl")]
20734	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20735	#[cfg_attr(test, assert_instr(vpslld))]
20736	pub fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20737	unsafe {
20738	let shf: Simd = _mm_sll_epi32(a, count).as_i32x4();
20739	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
20740	}
20741	}
20742
20743	/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst.
20744	///
20745	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi32&expand=5492)
20746	#[inline]
20747	#[target_feature(enable = "avx512f")]
20748	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20749	#[cfg_attr(test, assert_instr(vpsrld))]
20750	pub fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
20751	unsafe { transmute(src:vpsrld(a.as_i32x16(), count.as_i32x4())) }
20752	}
20753
20754	/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20755	///
20756	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi32&expand=5490)
20757	#[inline]
20758	#[target_feature(enable = "avx512f")]
20759	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20760	#[cfg_attr(test, assert_instr(vpsrld))]
20761	pub fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20762	unsafe {
20763	let shf: Simd = _mm512_srl_epi32(a, count).as_i32x16();
20764	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
20765	}
20766	}
20767
20768	/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20769	///
20770	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi32&expand=5491)
20771	#[inline]
20772	#[target_feature(enable = "avx512f")]
20773	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20774	#[cfg_attr(test, assert_instr(vpsrld))]
20775	pub fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20776	unsafe {
20777	let shf: Simd = _mm512_srl_epi32(a, count).as_i32x16();
20778	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
20779	}
20780	}
20781
20782	/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20783	///
20784	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi32&expand=5487)
20785	#[inline]
20786	#[target_feature(enable = "avx512f,avx512vl")]
20787	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20788	#[cfg_attr(test, assert_instr(vpsrld))]
20789	pub fn _mm256_mask_srl_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20790	unsafe {
20791	let shf: Simd = _mm256_srl_epi32(a, count).as_i32x8();
20792	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
20793	}
20794	}
20795
20796	/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20797	///
20798	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi32&expand=5488)
20799	#[inline]
20800	#[target_feature(enable = "avx512f,avx512vl")]
20801	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20802	#[cfg_attr(test, assert_instr(vpsrld))]
20803	pub fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20804	unsafe {
20805	let shf: Simd = _mm256_srl_epi32(a, count).as_i32x8();
20806	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
20807	}
20808	}
20809
20810	/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20811	///
20812	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi32&expand=5484)
20813	#[inline]
20814	#[target_feature(enable = "avx512f,avx512vl")]
20815	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20816	#[cfg_attr(test, assert_instr(vpsrld))]
20817	pub fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20818	unsafe {
20819	let shf: Simd = _mm_srl_epi32(a, count).as_i32x4();
20820	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
20821	}
20822	}
20823
20824	/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20825	///
20826	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi32&expand=5485)
20827	#[inline]
20828	#[target_feature(enable = "avx512f,avx512vl")]
20829	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20830	#[cfg_attr(test, assert_instr(vpsrld))]
20831	pub fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20832	unsafe {
20833	let shf: Simd = _mm_srl_epi32(a, count).as_i32x4();
20834	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
20835	}
20836	}
20837
20838	/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst.
20839	///
20840	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi64&expand=5289)
20841	#[inline]
20842	#[target_feature(enable = "avx512f")]
20843	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20844	#[cfg_attr(test, assert_instr(vpsllq))]
20845	pub fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
20846	unsafe { transmute(src:vpsllq(a.as_i64x8(), count.as_i64x2())) }
20847	}
20848
20849	/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20850	///
20851	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi64&expand=5287)
20852	#[inline]
20853	#[target_feature(enable = "avx512f")]
20854	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20855	#[cfg_attr(test, assert_instr(vpsllq))]
20856	pub fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20857	unsafe {
20858	let shf: Simd = _mm512_sll_epi64(a, count).as_i64x8();
20859	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
20860	}
20861	}
20862
20863	/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20864	///
20865	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi64&expand=5288)
20866	#[inline]
20867	#[target_feature(enable = "avx512f")]
20868	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20869	#[cfg_attr(test, assert_instr(vpsllq))]
20870	pub fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20871	unsafe {
20872	let shf: Simd = _mm512_sll_epi64(a, count).as_i64x8();
20873	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
20874	}
20875	}
20876
20877	/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20878	///
20879	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi64&expand=5284)
20880	#[inline]
20881	#[target_feature(enable = "avx512f,avx512vl")]
20882	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20883	#[cfg_attr(test, assert_instr(vpsllq))]
20884	pub fn _mm256_mask_sll_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20885	unsafe {
20886	let shf: Simd = _mm256_sll_epi64(a, count).as_i64x4();
20887	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
20888	}
20889	}
20890
20891	/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20892	///
20893	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi64&expand=5285)
20894	#[inline]
20895	#[target_feature(enable = "avx512f,avx512vl")]
20896	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20897	#[cfg_attr(test, assert_instr(vpsllq))]
20898	pub fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20899	unsafe {
20900	let shf: Simd = _mm256_sll_epi64(a, count).as_i64x4();
20901	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
20902	}
20903	}
20904
20905	/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20906	///
20907	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi64&expand=5281)
20908	#[inline]
20909	#[target_feature(enable = "avx512f,avx512vl")]
20910	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20911	#[cfg_attr(test, assert_instr(vpsllq))]
20912	pub fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20913	unsafe {
20914	let shf: Simd = _mm_sll_epi64(a, count).as_i64x2();
20915	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
20916	}
20917	}
20918
20919	/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20920	///
20921	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi64&expand=5282)
20922	#[inline]
20923	#[target_feature(enable = "avx512f,avx512vl")]
20924	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20925	#[cfg_attr(test, assert_instr(vpsllq))]
20926	pub fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20927	unsafe {
20928	let shf: Simd = _mm_sll_epi64(a, count).as_i64x2();
20929	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
20930	}
20931	}
20932
20933	/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst.
20934	///
20935	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi64&expand=5501)
20936	#[inline]
20937	#[target_feature(enable = "avx512f")]
20938	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20939	#[cfg_attr(test, assert_instr(vpsrlq))]
20940	pub fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
20941	unsafe { transmute(src:vpsrlq(a.as_i64x8(), count.as_i64x2())) }
20942	}
20943
20944	/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20945	///
20946	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi64&expand=5499)
20947	#[inline]
20948	#[target_feature(enable = "avx512f")]
20949	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20950	#[cfg_attr(test, assert_instr(vpsrlq))]
20951	pub fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20952	unsafe {
20953	let shf: Simd = _mm512_srl_epi64(a, count).as_i64x8();
20954	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
20955	}
20956	}
20957
20958	/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20959	///
20960	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi64&expand=5500)
20961	#[inline]
20962	#[target_feature(enable = "avx512f")]
20963	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20964	#[cfg_attr(test, assert_instr(vpsrlq))]
20965	pub fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20966	unsafe {
20967	let shf: Simd = _mm512_srl_epi64(a, count).as_i64x8();
20968	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
20969	}
20970	}
20971
20972	/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20973	///
20974	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi64&expand=5496)
20975	#[inline]
20976	#[target_feature(enable = "avx512f,avx512vl")]
20977	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20978	#[cfg_attr(test, assert_instr(vpsrlq))]
20979	pub fn _mm256_mask_srl_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20980	unsafe {
20981	let shf: Simd = _mm256_srl_epi64(a, count).as_i64x4();
20982	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
20983	}
20984	}
20985
20986	/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20987	///
20988	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi64&expand=5497)
20989	#[inline]
20990	#[target_feature(enable = "avx512f,avx512vl")]
20991	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20992	#[cfg_attr(test, assert_instr(vpsrlq))]
20993	pub fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20994	unsafe {
20995	let shf: Simd = _mm256_srl_epi64(a, count).as_i64x4();
20996	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
20997	}
20998	}
20999
21000	/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21001	///
21002	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi64&expand=5493)
21003	#[inline]
21004	#[target_feature(enable = "avx512f,avx512vl")]
21005	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21006	#[cfg_attr(test, assert_instr(vpsrlq))]
21007	pub fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21008	unsafe {
21009	let shf: Simd = _mm_srl_epi64(a, count).as_i64x2();
21010	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
21011	}
21012	}
21013
21014	/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21015	///
21016	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi64&expand=5494)
21017	#[inline]
21018	#[target_feature(enable = "avx512f,avx512vl")]
21019	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21020	#[cfg_attr(test, assert_instr(vpsrlq))]
21021	pub fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21022	unsafe {
21023	let shf: Simd = _mm_srl_epi64(a, count).as_i64x2();
21024	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
21025	}
21026	}
21027
21028	/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst.
21029	///
21030	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi32&expand=5407)
21031	#[inline]
21032	#[target_feature(enable = "avx512f")]
21033	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21034	#[cfg_attr(test, assert_instr(vpsrad))]
21035	pub fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
21036	unsafe { transmute(src:vpsrad(a.as_i32x16(), count.as_i32x4())) }
21037	}
21038
21039	/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21040	///
21041	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi32&expand=5405)
21042	#[inline]
21043	#[target_feature(enable = "avx512f")]
21044	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21045	#[cfg_attr(test, assert_instr(vpsrad))]
21046	pub fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
21047	unsafe {
21048	let shf: Simd = _mm512_sra_epi32(a, count).as_i32x16();
21049	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
21050	}
21051	}
21052
21053	/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21054	///
21055	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi32&expand=5406)
21056	#[inline]
21057	#[target_feature(enable = "avx512f")]
21058	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21059	#[cfg_attr(test, assert_instr(vpsrad))]
21060	pub fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
21061	unsafe {
21062	let shf: Simd = _mm512_sra_epi32(a, count).as_i32x16();
21063	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
21064	}
21065	}
21066
21067	/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21068	///
21069	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi32&expand=5402)
21070	#[inline]
21071	#[target_feature(enable = "avx512f,avx512vl")]
21072	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21073	#[cfg_attr(test, assert_instr(vpsrad))]
21074	pub fn _mm256_mask_sra_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
21075	unsafe {
21076	let shf: Simd = _mm256_sra_epi32(a, count).as_i32x8();
21077	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
21078	}
21079	}
21080
21081	/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21082	///
21083	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi32&expand=5403)
21084	#[inline]
21085	#[target_feature(enable = "avx512f,avx512vl")]
21086	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21087	#[cfg_attr(test, assert_instr(vpsrad))]
21088	pub fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
21089	unsafe {
21090	let shf: Simd = _mm256_sra_epi32(a, count).as_i32x8();
21091	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
21092	}
21093	}
21094
21095	/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21096	///
21097	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi32&expand=5399)
21098	#[inline]
21099	#[target_feature(enable = "avx512f,avx512vl")]
21100	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21101	#[cfg_attr(test, assert_instr(vpsrad))]
21102	pub fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21103	unsafe {
21104	let shf: Simd = _mm_sra_epi32(a, count).as_i32x4();
21105	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
21106	}
21107	}
21108
21109	/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21110	///
21111	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi32&expand=5400)
21112	#[inline]
21113	#[target_feature(enable = "avx512f,avx512vl")]
21114	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21115	#[cfg_attr(test, assert_instr(vpsrad))]
21116	pub fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21117	unsafe {
21118	let shf: Simd = _mm_sra_epi32(a, count).as_i32x4();
21119	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
21120	}
21121	}
21122
21123	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
21124	///
21125	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi64&expand=5416)
21126	#[inline]
21127	#[target_feature(enable = "avx512f")]
21128	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21129	#[cfg_attr(test, assert_instr(vpsraq))]
21130	pub fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
21131	unsafe { transmute(src:vpsraq(a.as_i64x8(), count.as_i64x2())) }
21132	}
21133
21134	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21135	///
21136	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi64&expand=5414)
21137	#[inline]
21138	#[target_feature(enable = "avx512f")]
21139	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21140	#[cfg_attr(test, assert_instr(vpsraq))]
21141	pub fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
21142	unsafe {
21143	let shf: Simd = _mm512_sra_epi64(a, count).as_i64x8();
21144	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
21145	}
21146	}
21147
21148	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21149	///
21150	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi64&expand=5415)
21151	#[inline]
21152	#[target_feature(enable = "avx512f")]
21153	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21154	#[cfg_attr(test, assert_instr(vpsraq))]
21155	pub fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
21156	unsafe {
21157	let shf: Simd = _mm512_sra_epi64(a, count).as_i64x8();
21158	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
21159	}
21160	}
21161
21162	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
21163	///
21164	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi64&expand=5413)
21165	#[inline]
21166	#[target_feature(enable = "avx512f,avx512vl")]
21167	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21168	#[cfg_attr(test, assert_instr(vpsraq))]
21169	pub fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i {
21170	unsafe { transmute(src:vpsraq256(a.as_i64x4(), count.as_i64x2())) }
21171	}
21172
21173	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21174	///
21175	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi64&expand=5411)
21176	#[inline]
21177	#[target_feature(enable = "avx512f,avx512vl")]
21178	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21179	#[cfg_attr(test, assert_instr(vpsraq))]
21180	pub fn _mm256_mask_sra_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
21181	unsafe {
21182	let shf: Simd = _mm256_sra_epi64(a, count).as_i64x4();
21183	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
21184	}
21185	}
21186
21187	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21188	///
21189	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi64&expand=5412)
21190	#[inline]
21191	#[target_feature(enable = "avx512f,avx512vl")]
21192	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21193	#[cfg_attr(test, assert_instr(vpsraq))]
21194	pub fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
21195	unsafe {
21196	let shf: Simd = _mm256_sra_epi64(a, count).as_i64x4();
21197	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
21198	}
21199	}
21200
21201	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
21202	///
21203	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi64&expand=5410)
21204	#[inline]
21205	#[target_feature(enable = "avx512f,avx512vl")]
21206	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21207	#[cfg_attr(test, assert_instr(vpsraq))]
21208	pub fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i {
21209	unsafe { transmute(src:vpsraq128(a.as_i64x2(), count.as_i64x2())) }
21210	}
21211
21212	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21213	///
21214	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi64&expand=5408)
21215	#[inline]
21216	#[target_feature(enable = "avx512f,avx512vl")]
21217	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21218	#[cfg_attr(test, assert_instr(vpsraq))]
21219	pub fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21220	unsafe {
21221	let shf: Simd = _mm_sra_epi64(a, count).as_i64x2();
21222	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
21223	}
21224	}
21225
21226	/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21227	///
21228	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi64&expand=5409)
21229	#[inline]
21230	#[target_feature(enable = "avx512f,avx512vl")]
21231	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21232	#[cfg_attr(test, assert_instr(vpsraq))]
21233	pub fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21234	unsafe {
21235	let shf: Simd = _mm_sra_epi64(a, count).as_i64x2();
21236	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
21237	}
21238	}
21239
21240	/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
21241	///
21242	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi32&expand=5436)
21243	#[inline]
21244	#[target_feature(enable = "avx512f")]
21245	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21246	#[cfg_attr(test, assert_instr(vpsrad, IMM8 = `1`))]
21247	#[rustc_legacy_const_generics(`1`)]
21248	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21249	pub const fn _mm512_srai_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
21250	unsafe {
21251	static_assert_uimm_bits!(IMM8, `8`);
21252	transmute(src:simd_shr(lhs:a.as_i32x16(), rhs:i32x16::splat(IMM8.min(`31`) as i32)))
21253	}
21254	}
21255
21256	/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21257	///
21258	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi32&expand=5434)
21259	#[inline]
21260	#[target_feature(enable = "avx512f")]
21261	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21262	#[cfg_attr(test, assert_instr(vpsrad, IMM8 = `1`))]
21263	#[rustc_legacy_const_generics(`3`)]
21264	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21265	pub const fn _mm512_mask_srai_epi32<const IMM8: u32>(
21266	src: __m512i,
21267	k: __mmask16,
21268	a: __m512i,
21269	) -> __m512i {
21270	unsafe {
21271	static_assert_uimm_bits!(IMM8, `8`);
21272	let r: Simd = simd_shr(lhs:a.as_i32x16(), rhs:i32x16::splat(IMM8.min(`31`) as i32));
21273	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
21274	}
21275	}
21276
21277	/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21278	///
21279	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi32&expand=5435)
21280	#[inline]
21281	#[target_feature(enable = "avx512f")]
21282	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21283	#[cfg_attr(test, assert_instr(vpsrad, IMM8 = `1`))]
21284	#[rustc_legacy_const_generics(`2`)]
21285	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21286	pub const fn _mm512_maskz_srai_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
21287	unsafe {
21288	static_assert_uimm_bits!(IMM8, `8`);
21289	let r: Simd = simd_shr(lhs:a.as_i32x16(), rhs:i32x16::splat(IMM8.min(`31`) as i32));
21290	transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
21291	}
21292	}
21293
21294	/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21295	///
21296	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi32&expand=5431)
21297	#[inline]
21298	#[target_feature(enable = "avx512f,avx512vl")]
21299	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21300	#[cfg_attr(test, assert_instr(vpsrad, IMM8 = `1`))]
21301	#[rustc_legacy_const_generics(`3`)]
21302	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21303	pub const fn _mm256_mask_srai_epi32<const IMM8: u32>(
21304	src: __m256i,
21305	k: __mmask8,
21306	a: __m256i,
21307	) -> __m256i {
21308	unsafe {
21309	let r: Simd = simd_shr(lhs:a.as_i32x8(), rhs:i32x8::splat(IMM8.min(`31`) as i32));
21310	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
21311	}
21312	}
21313
21314	/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21315	///
21316	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi32&expand=5432)
21317	#[inline]
21318	#[target_feature(enable = "avx512f,avx512vl")]
21319	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21320	#[cfg_attr(test, assert_instr(vpsrad, IMM8 = `1`))]
21321	#[rustc_legacy_const_generics(`2`)]
21322	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21323	pub const fn _mm256_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
21324	unsafe {
21325	let r: Simd = simd_shr(lhs:a.as_i32x8(), rhs:i32x8::splat(IMM8.min(`31`) as i32));
21326	transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
21327	}
21328	}
21329
21330	/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21331	///
21332	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi32&expand=5428)
21333	#[inline]
21334	#[target_feature(enable = "avx512f,avx512vl")]
21335	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21336	#[cfg_attr(test, assert_instr(vpsrad, IMM8 = `1`))]
21337	#[rustc_legacy_const_generics(`3`)]
21338	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21339	pub const fn _mm_mask_srai_epi32<const IMM8: u32>(
21340	src: __m128i,
21341	k: __mmask8,
21342	a: __m128i,
21343	) -> __m128i {
21344	unsafe {
21345	let r: Simd = simd_shr(lhs:a.as_i32x4(), rhs:i32x4::splat(IMM8.min(`31`) as i32));
21346	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
21347	}
21348	}
21349
21350	/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21351	///
21352	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi32&expand=5429)
21353	#[inline]
21354	#[target_feature(enable = "avx512f,avx512vl")]
21355	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21356	#[cfg_attr(test, assert_instr(vpsrad, IMM8 = `1`))]
21357	#[rustc_legacy_const_generics(`2`)]
21358	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21359	pub const fn _mm_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
21360	unsafe {
21361	let r: Simd = simd_shr(lhs:a.as_i32x4(), rhs:i32x4::splat(IMM8.min(`31`) as i32));
21362	transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
21363	}
21364	}
21365
21366	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
21367	///
21368	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi64&expand=5445)
21369	#[inline]
21370	#[target_feature(enable = "avx512f")]
21371	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21372	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
21373	#[rustc_legacy_const_generics(`1`)]
21374	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21375	pub const fn _mm512_srai_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
21376	unsafe {
21377	static_assert_uimm_bits!(IMM8, `8`);
21378	transmute(src:simd_shr(lhs:a.as_i64x8(), rhs:i64x8::splat(IMM8.min(`63`) as i64)))
21379	}
21380	}
21381
21382	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21383	///
21384	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi64&expand=5443)
21385	#[inline]
21386	#[target_feature(enable = "avx512f")]
21387	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21388	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
21389	#[rustc_legacy_const_generics(`3`)]
21390	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21391	pub const fn _mm512_mask_srai_epi64<const IMM8: u32>(
21392	src: __m512i,
21393	k: __mmask8,
21394	a: __m512i,
21395	) -> __m512i {
21396	unsafe {
21397	static_assert_uimm_bits!(IMM8, `8`);
21398	let shf: Simd = simd_shr(lhs:a.as_i64x8(), rhs:i64x8::splat(IMM8.min(`63`) as i64));
21399	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
21400	}
21401	}
21402
21403	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21404	///
21405	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi64&expand=5444)
21406	#[inline]
21407	#[target_feature(enable = "avx512f")]
21408	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21409	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
21410	#[rustc_legacy_const_generics(`2`)]
21411	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21412	pub const fn _mm512_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
21413	unsafe {
21414	static_assert_uimm_bits!(IMM8, `8`);
21415	let shf: Simd = simd_shr(lhs:a.as_i64x8(), rhs:i64x8::splat(IMM8.min(`63`) as i64));
21416	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
21417	}
21418	}
21419
21420	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
21421	///
21422	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi64&expand=5442)
21423	#[inline]
21424	#[target_feature(enable = "avx512f,avx512vl")]
21425	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21426	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
21427	#[rustc_legacy_const_generics(`1`)]
21428	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21429	pub const fn _mm256_srai_epi64<const IMM8: u32>(a: __m256i) -> __m256i {
21430	unsafe {
21431	static_assert_uimm_bits!(IMM8, `8`);
21432	transmute(src:simd_shr(lhs:a.as_i64x4(), rhs:i64x4::splat(IMM8.min(`63`) as i64)))
21433	}
21434	}
21435
21436	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21437	///
21438	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi64&expand=5440)
21439	#[inline]
21440	#[target_feature(enable = "avx512f,avx512vl")]
21441	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21442	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
21443	#[rustc_legacy_const_generics(`3`)]
21444	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21445	pub const fn _mm256_mask_srai_epi64<const IMM8: u32>(
21446	src: __m256i,
21447	k: __mmask8,
21448	a: __m256i,
21449	) -> __m256i {
21450	unsafe {
21451	static_assert_uimm_bits!(IMM8, `8`);
21452	let shf: Simd = simd_shr(lhs:a.as_i64x4(), rhs:i64x4::splat(IMM8.min(`63`) as i64));
21453	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
21454	}
21455	}
21456
21457	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21458	///
21459	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi64&expand=5441)
21460	#[inline]
21461	#[target_feature(enable = "avx512f,avx512vl")]
21462	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21463	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
21464	#[rustc_legacy_const_generics(`2`)]
21465	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21466	pub const fn _mm256_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
21467	unsafe {
21468	static_assert_uimm_bits!(IMM8, `8`);
21469	let shf: Simd = simd_shr(lhs:a.as_i64x4(), rhs:i64x4::splat(IMM8.min(`63`) as i64));
21470	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
21471	}
21472	}
21473
21474	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
21475	///
21476	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi64&expand=5439)
21477	#[inline]
21478	#[target_feature(enable = "avx512f,avx512vl")]
21479	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21480	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
21481	#[rustc_legacy_const_generics(`1`)]
21482	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21483	pub const fn _mm_srai_epi64<const IMM8: u32>(a: __m128i) -> __m128i {
21484	unsafe {
21485	static_assert_uimm_bits!(IMM8, `8`);
21486	transmute(src:simd_shr(lhs:a.as_i64x2(), rhs:i64x2::splat(IMM8.min(`63`) as i64)))
21487	}
21488	}
21489
21490	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21491	///
21492	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi64&expand=5437)
21493	#[inline]
21494	#[target_feature(enable = "avx512f,avx512vl")]
21495	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21496	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
21497	#[rustc_legacy_const_generics(`3`)]
21498	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21499	pub const fn _mm_mask_srai_epi64<const IMM8: u32>(
21500	src: __m128i,
21501	k: __mmask8,
21502	a: __m128i,
21503	) -> __m128i {
21504	unsafe {
21505	static_assert_uimm_bits!(IMM8, `8`);
21506	let shf: Simd = simd_shr(lhs:a.as_i64x2(), rhs:i64x2::splat(IMM8.min(`63`) as i64));
21507	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
21508	}
21509	}
21510
21511	/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21512	///
21513	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi64&expand=5438)
21514	#[inline]
21515	#[target_feature(enable = "avx512f,avx512vl")]
21516	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21517	#[cfg_attr(test, assert_instr(vpsraq, IMM8 = `1`))]
21518	#[rustc_legacy_const_generics(`2`)]
21519	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21520	pub const fn _mm_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
21521	unsafe {
21522	static_assert_uimm_bits!(IMM8, `8`);
21523	let shf: Simd = simd_shr(lhs:a.as_i64x2(), rhs:i64x2::splat(IMM8.min(`63`) as i64));
21524	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
21525	}
21526	}
21527
21528	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21529	///
21530	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi32&expand=5465)
21531	#[inline]
21532	#[target_feature(enable = "avx512f")]
21533	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21534	#[cfg_attr(test, assert_instr(vpsravd))]
21535	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21536	pub const fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
21537	unsafe {
21538	let count: Simd = count.as_u32x16();
21539	let no_overflow: u32x16 = simd_lt(x:count, y:u32x16::splat(u32::BITS));
21540	let count: Simd = simd_select(mask:no_overflow, if_true:transmute(count), if_false:i32x16::splat(`31`));
21541	simd_shr(lhs:a.as_i32x16(), rhs:count).as_m512i()
21542	}
21543	}
21544
21545	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21546	///
21547	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi32&expand=5463)
21548	#[inline]
21549	#[target_feature(enable = "avx512f")]
21550	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21551	#[cfg_attr(test, assert_instr(vpsravd))]
21552	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21553	pub const fn _mm512_mask_srav_epi32(
21554	src: __m512i,
21555	k: __mmask16,
21556	a: __m512i,
21557	count: __m512i,
21558	) -> __m512i {
21559	unsafe {
21560	let shf: Simd = _mm512_srav_epi32(a, count).as_i32x16();
21561	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
21562	}
21563	}
21564
21565	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21566	///
21567	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi32&expand=5464)
21568	#[inline]
21569	#[target_feature(enable = "avx512f")]
21570	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21571	#[cfg_attr(test, assert_instr(vpsravd))]
21572	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21573	pub const fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21574	unsafe {
21575	let shf: Simd = _mm512_srav_epi32(a, count).as_i32x16();
21576	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
21577	}
21578	}
21579
21580	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21581	///
21582	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi32&expand=5460)
21583	#[inline]
21584	#[target_feature(enable = "avx512f,avx512vl")]
21585	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21586	#[cfg_attr(test, assert_instr(vpsravd))]
21587	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21588	pub const fn _mm256_mask_srav_epi32(
21589	src: __m256i,
21590	k: __mmask8,
21591	a: __m256i,
21592	count: __m256i,
21593	) -> __m256i {
21594	unsafe {
21595	let shf: Simd = _mm256_srav_epi32(a, count).as_i32x8();
21596	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
21597	}
21598	}
21599
21600	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21601	///
21602	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi32&expand=5461)
21603	#[inline]
21604	#[target_feature(enable = "avx512f,avx512vl")]
21605	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21606	#[cfg_attr(test, assert_instr(vpsravd))]
21607	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21608	pub const fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21609	unsafe {
21610	let shf: Simd = _mm256_srav_epi32(a, count).as_i32x8();
21611	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
21612	}
21613	}
21614
21615	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21616	///
21617	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi32&expand=5457)
21618	#[inline]
21619	#[target_feature(enable = "avx512f,avx512vl")]
21620	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21621	#[cfg_attr(test, assert_instr(vpsravd))]
21622	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21623	pub const fn _mm_mask_srav_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21624	unsafe {
21625	let shf: Simd = _mm_srav_epi32(a, count).as_i32x4();
21626	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
21627	}
21628	}
21629
21630	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21631	///
21632	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi32&expand=5458)
21633	#[inline]
21634	#[target_feature(enable = "avx512f,avx512vl")]
21635	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21636	#[cfg_attr(test, assert_instr(vpsravd))]
21637	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21638	pub const fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21639	unsafe {
21640	let shf: Simd = _mm_srav_epi32(a, count).as_i32x4();
21641	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
21642	}
21643	}
21644
21645	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21646	///
21647	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi64&expand=5474)
21648	#[inline]
21649	#[target_feature(enable = "avx512f")]
21650	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21651	#[cfg_attr(test, assert_instr(vpsravq))]
21652	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21653	pub const fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
21654	unsafe {
21655	let count: Simd = count.as_u64x8();
21656	let no_overflow: u64x8 = simd_lt(x:count, y:u64x8::splat(u64::BITS as u64));
21657	let count: Simd = simd_select(mask:no_overflow, if_true:transmute(count), if_false:i64x8::splat(`63`));
21658	simd_shr(lhs:a.as_i64x8(), rhs:count).as_m512i()
21659	}
21660	}
21661
21662	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21663	///
21664	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi64&expand=5472)
21665	#[inline]
21666	#[target_feature(enable = "avx512f")]
21667	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21668	#[cfg_attr(test, assert_instr(vpsravq))]
21669	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21670	pub const fn _mm512_mask_srav_epi64(
21671	src: __m512i,
21672	k: __mmask8,
21673	a: __m512i,
21674	count: __m512i,
21675	) -> __m512i {
21676	unsafe {
21677	let shf: Simd = _mm512_srav_epi64(a, count).as_i64x8();
21678	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
21679	}
21680	}
21681
21682	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21683	///
21684	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi64&expand=5473)
21685	#[inline]
21686	#[target_feature(enable = "avx512f")]
21687	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21688	#[cfg_attr(test, assert_instr(vpsravq))]
21689	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21690	pub const fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21691	unsafe {
21692	let shf: Simd = _mm512_srav_epi64(a, count).as_i64x8();
21693	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
21694	}
21695	}
21696
21697	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21698	///
21699	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi64&expand=5471)
21700	#[inline]
21701	#[target_feature(enable = "avx512f,avx512vl")]
21702	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21703	#[cfg_attr(test, assert_instr(vpsravq))]
21704	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21705	pub const fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i {
21706	unsafe {
21707	let count: Simd = count.as_u64x4();
21708	let no_overflow: u64x4 = simd_lt(x:count, y:u64x4::splat(u64::BITS as u64));
21709	let count: Simd = simd_select(mask:no_overflow, if_true:transmute(count), if_false:i64x4::splat(`63`));
21710	simd_shr(lhs:a.as_i64x4(), rhs:count).as_m256i()
21711	}
21712	}
21713
21714	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21715	///
21716	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi64&expand=5469)
21717	#[inline]
21718	#[target_feature(enable = "avx512f,avx512vl")]
21719	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21720	#[cfg_attr(test, assert_instr(vpsravq))]
21721	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21722	pub const fn _mm256_mask_srav_epi64(
21723	src: __m256i,
21724	k: __mmask8,
21725	a: __m256i,
21726	count: __m256i,
21727	) -> __m256i {
21728	unsafe {
21729	let shf: Simd = _mm256_srav_epi64(a, count).as_i64x4();
21730	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
21731	}
21732	}
21733
21734	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21735	///
21736	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi64&expand=5470)
21737	#[inline]
21738	#[target_feature(enable = "avx512f,avx512vl")]
21739	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21740	#[cfg_attr(test, assert_instr(vpsravq))]
21741	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21742	pub const fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21743	unsafe {
21744	let shf: Simd = _mm256_srav_epi64(a, count).as_i64x4();
21745	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
21746	}
21747	}
21748
21749	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21750	///
21751	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi64&expand=5468)
21752	#[inline]
21753	#[target_feature(enable = "avx512f,avx512vl")]
21754	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21755	#[cfg_attr(test, assert_instr(vpsravq))]
21756	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21757	pub const fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i {
21758	unsafe {
21759	let count: Simd = count.as_u64x2();
21760	let no_overflow: u64x2 = simd_lt(x:count, y:u64x2::splat(u64::BITS as u64));
21761	let count: Simd = simd_select(mask:no_overflow, if_true:transmute(count), if_false:i64x2::splat(`63`));
21762	simd_shr(lhs:a.as_i64x2(), rhs:count).as_m128i()
21763	}
21764	}
21765
21766	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21767	///
21768	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi64&expand=5466)
21769	#[inline]
21770	#[target_feature(enable = "avx512f,avx512vl")]
21771	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21772	#[cfg_attr(test, assert_instr(vpsravq))]
21773	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21774	pub const fn _mm_mask_srav_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21775	unsafe {
21776	let shf: Simd = _mm_srav_epi64(a, count).as_i64x2();
21777	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
21778	}
21779	}
21780
21781	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21782	///
21783	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi64&expand=5467)
21784	#[inline]
21785	#[target_feature(enable = "avx512f,avx512vl")]
21786	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21787	#[cfg_attr(test, assert_instr(vpsravq))]
21788	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21789	pub const fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21790	unsafe {
21791	let shf: Simd = _mm_srav_epi64(a, count).as_i64x2();
21792	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
21793	}
21794	}
21795
21796	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21797	///
21798	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi32&expand=4703)
21799	#[inline]
21800	#[target_feature(enable = "avx512f")]
21801	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21802	#[cfg_attr(test, assert_instr(vprolvd))]
21803	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21804	pub const fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
21805	unsafe {
21806	transmute(src:simd_funnel_shl(
21807	a.as_u32x16(),
21808	b:a.as_u32x16(),
21809	shift:simd_and(x:b.as_u32x16(), y:u32x16::splat(`31`)),
21810	))
21811	}
21812	}
21813
21814	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21815	///
21816	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi32&expand=4701)
21817	#[inline]
21818	#[target_feature(enable = "avx512f")]
21819	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21820	#[cfg_attr(test, assert_instr(vprolvd))]
21821	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21822	pub const fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21823	unsafe {
21824	let rol: Simd = _mm512_rolv_epi32(a, b).as_i32x16();
21825	transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i32x16()))
21826	}
21827	}
21828
21829	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21830	///
21831	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi32&expand=4702)
21832	#[inline]
21833	#[target_feature(enable = "avx512f")]
21834	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21835	#[cfg_attr(test, assert_instr(vprolvd))]
21836	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21837	pub const fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21838	unsafe {
21839	let rol: Simd = _mm512_rolv_epi32(a, b).as_i32x16();
21840	transmute(src:simd_select_bitmask(m:k, yes:rol, no:i32x16::ZERO))
21841	}
21842	}
21843
21844	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21845	///
21846	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi32&expand=4700)
21847	#[inline]
21848	#[target_feature(enable = "avx512f,avx512vl")]
21849	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21850	#[cfg_attr(test, assert_instr(vprolvd))]
21851	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21852	pub const fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i {
21853	unsafe {
21854	transmute(src:simd_funnel_shl(
21855	a.as_u32x8(),
21856	b:a.as_u32x8(),
21857	shift:simd_and(x:b.as_u32x8(), y:u32x8::splat(`31`)),
21858	))
21859	}
21860	}
21861
21862	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21863	///
21864	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi32&expand=4698)
21865	#[inline]
21866	#[target_feature(enable = "avx512f,avx512vl")]
21867	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21868	#[cfg_attr(test, assert_instr(vprolvd))]
21869	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21870	pub const fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21871	unsafe {
21872	let rol: Simd = _mm256_rolv_epi32(a, b).as_i32x8();
21873	transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i32x8()))
21874	}
21875	}
21876
21877	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21878	///
21879	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi32&expand=4699)
21880	#[inline]
21881	#[target_feature(enable = "avx512f,avx512vl")]
21882	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21883	#[cfg_attr(test, assert_instr(vprolvd))]
21884	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21885	pub const fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21886	unsafe {
21887	let rol: Simd = _mm256_rolv_epi32(a, b).as_i32x8();
21888	transmute(src:simd_select_bitmask(m:k, yes:rol, no:i32x8::ZERO))
21889	}
21890	}
21891
21892	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21893	///
21894	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi32&expand=4697)
21895	#[inline]
21896	#[target_feature(enable = "avx512f,avx512vl")]
21897	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21898	#[cfg_attr(test, assert_instr(vprolvd))]
21899	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21900	pub const fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i {
21901	unsafe {
21902	transmute(src:simd_funnel_shl(
21903	a.as_u32x4(),
21904	b:a.as_u32x4(),
21905	shift:simd_and(x:b.as_u32x4(), y:u32x4::splat(`31`)),
21906	))
21907	}
21908	}
21909
21910	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21911	///
21912	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi32&expand=4695)
21913	#[inline]
21914	#[target_feature(enable = "avx512f,avx512vl")]
21915	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21916	#[cfg_attr(test, assert_instr(vprolvd))]
21917	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21918	pub const fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21919	unsafe {
21920	let rol: Simd = _mm_rolv_epi32(a, b).as_i32x4();
21921	transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i32x4()))
21922	}
21923	}
21924
21925	/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21926	///
21927	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi32&expand=4696)
21928	#[inline]
21929	#[target_feature(enable = "avx512f,avx512vl")]
21930	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21931	#[cfg_attr(test, assert_instr(vprolvd))]
21932	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21933	pub const fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21934	unsafe {
21935	let rol: Simd = _mm_rolv_epi32(a, b).as_i32x4();
21936	transmute(src:simd_select_bitmask(m:k, yes:rol, no:i32x4::ZERO))
21937	}
21938	}
21939
21940	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21941	///
21942	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi32&expand=4739)
21943	#[inline]
21944	#[target_feature(enable = "avx512f")]
21945	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21946	#[cfg_attr(test, assert_instr(vprorvd))]
21947	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21948	pub const fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
21949	unsafe {
21950	transmute(src:simd_funnel_shr(
21951	a.as_u32x16(),
21952	b:a.as_u32x16(),
21953	shift:simd_and(x:b.as_u32x16(), y:u32x16::splat(`31`)),
21954	))
21955	}
21956	}
21957
21958	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21959	///
21960	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi32&expand=4737)
21961	#[inline]
21962	#[target_feature(enable = "avx512f")]
21963	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21964	#[cfg_attr(test, assert_instr(vprorvd))]
21965	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21966	pub const fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21967	unsafe {
21968	let ror: Simd = _mm512_rorv_epi32(a, b).as_i32x16();
21969	transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i32x16()))
21970	}
21971	}
21972
21973	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21974	///
21975	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi32&expand=4738)
21976	#[inline]
21977	#[target_feature(enable = "avx512f")]
21978	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21979	#[cfg_attr(test, assert_instr(vprorvd))]
21980	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21981	pub const fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21982	unsafe {
21983	let ror: Simd = _mm512_rorv_epi32(a, b).as_i32x16();
21984	transmute(src:simd_select_bitmask(m:k, yes:ror, no:i32x16::ZERO))
21985	}
21986	}
21987
21988	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21989	///
21990	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi32&expand=4736)
21991	#[inline]
21992	#[target_feature(enable = "avx512f,avx512vl")]
21993	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21994	#[cfg_attr(test, assert_instr(vprorvd))]
21995	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21996	pub const fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i {
21997	unsafe {
21998	transmute(src:simd_funnel_shr(
21999	a.as_u32x8(),
22000	b:a.as_u32x8(),
22001	shift:simd_and(x:b.as_u32x8(), y:u32x8::splat(`31`)),
22002	))
22003	}
22004	}
22005
22006	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22007	///
22008	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi32&expand=4734)
22009	#[inline]
22010	#[target_feature(enable = "avx512f,avx512vl")]
22011	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22012	#[cfg_attr(test, assert_instr(vprorvd))]
22013	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22014	pub const fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22015	unsafe {
22016	let ror: Simd = _mm256_rorv_epi32(a, b).as_i32x8();
22017	transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i32x8()))
22018	}
22019	}
22020
22021	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22022	///
22023	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi32&expand=4735)
22024	#[inline]
22025	#[target_feature(enable = "avx512f,avx512vl")]
22026	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22027	#[cfg_attr(test, assert_instr(vprorvd))]
22028	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22029	pub const fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22030	unsafe {
22031	let ror: Simd = _mm256_rorv_epi32(a, b).as_i32x8();
22032	transmute(src:simd_select_bitmask(m:k, yes:ror, no:i32x8::ZERO))
22033	}
22034	}
22035
22036	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
22037	///
22038	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi32&expand=4733)
22039	#[inline]
22040	#[target_feature(enable = "avx512f,avx512vl")]
22041	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22042	#[cfg_attr(test, assert_instr(vprorvd))]
22043	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22044	pub const fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i {
22045	unsafe {
22046	transmute(src:simd_funnel_shr(
22047	a.as_u32x4(),
22048	b:a.as_u32x4(),
22049	shift:simd_and(x:b.as_u32x4(), y:u32x4::splat(`31`)),
22050	))
22051	}
22052	}
22053
22054	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22055	///
22056	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi32&expand=4731)
22057	#[inline]
22058	#[target_feature(enable = "avx512f,avx512vl")]
22059	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22060	#[cfg_attr(test, assert_instr(vprorvd))]
22061	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22062	pub const fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22063	unsafe {
22064	let ror: Simd = _mm_rorv_epi32(a, b).as_i32x4();
22065	transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i32x4()))
22066	}
22067	}
22068
22069	/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22070	///
22071	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi32&expand=4732)
22072	#[inline]
22073	#[target_feature(enable = "avx512f,avx512vl")]
22074	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22075	#[cfg_attr(test, assert_instr(vprorvd))]
22076	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22077	pub const fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22078	unsafe {
22079	let ror: Simd = _mm_rorv_epi32(a, b).as_i32x4();
22080	transmute(src:simd_select_bitmask(m:k, yes:ror, no:i32x4::ZERO))
22081	}
22082	}
22083
22084	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
22085	///
22086	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi64&expand=4712)
22087	#[inline]
22088	#[target_feature(enable = "avx512f")]
22089	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22090	#[cfg_attr(test, assert_instr(vprolvq))]
22091	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22092	pub const fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
22093	unsafe {
22094	transmute(src:simd_funnel_shl(
22095	a.as_u64x8(),
22096	b:a.as_u64x8(),
22097	shift:simd_and(x:b.as_u64x8(), y:u64x8::splat(`63`)),
22098	))
22099	}
22100	}
22101
22102	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22103	///
22104	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi64&expand=4710)
22105	#[inline]
22106	#[target_feature(enable = "avx512f")]
22107	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22108	#[cfg_attr(test, assert_instr(vprolvq))]
22109	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22110	pub const fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
22111	unsafe {
22112	let rol: Simd = _mm512_rolv_epi64(a, b).as_i64x8();
22113	transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i64x8()))
22114	}
22115	}
22116
22117	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22118	///
22119	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi64&expand=4711)
22120	#[inline]
22121	#[target_feature(enable = "avx512f")]
22122	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22123	#[cfg_attr(test, assert_instr(vprolvq))]
22124	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22125	pub const fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
22126	unsafe {
22127	let rol: Simd = _mm512_rolv_epi64(a, b).as_i64x8();
22128	transmute(src:simd_select_bitmask(m:k, yes:rol, no:i64x8::ZERO))
22129	}
22130	}
22131
22132	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
22133	///
22134	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi64&expand=4709)
22135	#[inline]
22136	#[target_feature(enable = "avx512f,avx512vl")]
22137	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22138	#[cfg_attr(test, assert_instr(vprolvq))]
22139	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22140	pub const fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i {
22141	unsafe {
22142	transmute(src:simd_funnel_shl(
22143	a.as_u64x4(),
22144	b:a.as_u64x4(),
22145	shift:simd_and(x:b.as_u64x4(), y:u64x4::splat(`63`)),
22146	))
22147	}
22148	}
22149
22150	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22151	///
22152	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi64&expand=4707)
22153	#[inline]
22154	#[target_feature(enable = "avx512f,avx512vl")]
22155	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22156	#[cfg_attr(test, assert_instr(vprolvq))]
22157	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22158	pub const fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22159	unsafe {
22160	let rol: Simd = _mm256_rolv_epi64(a, b).as_i64x4();
22161	transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i64x4()))
22162	}
22163	}
22164
22165	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22166	///
22167	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi64&expand=4708)
22168	#[inline]
22169	#[target_feature(enable = "avx512f,avx512vl")]
22170	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22171	#[cfg_attr(test, assert_instr(vprolvq))]
22172	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22173	pub const fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22174	unsafe {
22175	let rol: Simd = _mm256_rolv_epi64(a, b).as_i64x4();
22176	transmute(src:simd_select_bitmask(m:k, yes:rol, no:i64x4::ZERO))
22177	}
22178	}
22179
22180	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
22181	///
22182	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi64&expand=4706)
22183	#[inline]
22184	#[target_feature(enable = "avx512f,avx512vl")]
22185	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22186	#[cfg_attr(test, assert_instr(vprolvq))]
22187	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22188	pub const fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i {
22189	unsafe {
22190	transmute(src:simd_funnel_shl(
22191	a.as_u64x2(),
22192	b:a.as_u64x2(),
22193	shift:simd_and(x:b.as_u64x2(), y:u64x2::splat(`63`)),
22194	))
22195	}
22196	}
22197
22198	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22199	///
22200	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi64&expand=4704)
22201	#[inline]
22202	#[target_feature(enable = "avx512f,avx512vl")]
22203	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22204	#[cfg_attr(test, assert_instr(vprolvq))]
22205	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22206	pub const fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22207	unsafe {
22208	let rol: Simd = _mm_rolv_epi64(a, b).as_i64x2();
22209	transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i64x2()))
22210	}
22211	}
22212
22213	/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22214	///
22215	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi64&expand=4705)
22216	#[inline]
22217	#[target_feature(enable = "avx512f,avx512vl")]
22218	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22219	#[cfg_attr(test, assert_instr(vprolvq))]
22220	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22221	pub const fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22222	unsafe {
22223	let rol: Simd = _mm_rolv_epi64(a, b).as_i64x2();
22224	transmute(src:simd_select_bitmask(m:k, yes:rol, no:i64x2::ZERO))
22225	}
22226	}
22227
22228	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
22229	///
22230	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi64&expand=4748)
22231	#[inline]
22232	#[target_feature(enable = "avx512f")]
22233	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22234	#[cfg_attr(test, assert_instr(vprorvq))]
22235	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22236	pub const fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
22237	unsafe {
22238	transmute(src:simd_funnel_shr(
22239	a.as_u64x8(),
22240	b:a.as_u64x8(),
22241	shift:simd_and(x:b.as_u64x8(), y:u64x8::splat(`63`)),
22242	))
22243	}
22244	}
22245
22246	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22247	///
22248	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi64&expand=4746)
22249	#[inline]
22250	#[target_feature(enable = "avx512f")]
22251	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22252	#[cfg_attr(test, assert_instr(vprorvq))]
22253	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22254	pub const fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
22255	unsafe {
22256	let ror: Simd = _mm512_rorv_epi64(a, b).as_i64x8();
22257	transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i64x8()))
22258	}
22259	}
22260
22261	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22262	///
22263	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi64&expand=4747)
22264	#[inline]
22265	#[target_feature(enable = "avx512f")]
22266	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22267	#[cfg_attr(test, assert_instr(vprorvq))]
22268	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22269	pub const fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
22270	unsafe {
22271	let ror: Simd = _mm512_rorv_epi64(a, b).as_i64x8();
22272	transmute(src:simd_select_bitmask(m:k, yes:ror, no:i64x8::ZERO))
22273	}
22274	}
22275
22276	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
22277	///
22278	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi64&expand=4745)
22279	#[inline]
22280	#[target_feature(enable = "avx512f,avx512vl")]
22281	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22282	#[cfg_attr(test, assert_instr(vprorvq))]
22283	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22284	pub const fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i {
22285	unsafe {
22286	transmute(src:simd_funnel_shr(
22287	a.as_u64x4(),
22288	b:a.as_u64x4(),
22289	shift:simd_and(x:b.as_u64x4(), y:u64x4::splat(`63`)),
22290	))
22291	}
22292	}
22293
22294	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22295	///
22296	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi64&expand=4743)
22297	#[inline]
22298	#[target_feature(enable = "avx512f,avx512vl")]
22299	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22300	#[cfg_attr(test, assert_instr(vprorvq))]
22301	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22302	pub const fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22303	unsafe {
22304	let ror: Simd = _mm256_rorv_epi64(a, b).as_i64x4();
22305	transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i64x4()))
22306	}
22307	}
22308
22309	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22310	///
22311	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi64&expand=4744)
22312	#[inline]
22313	#[target_feature(enable = "avx512f,avx512vl")]
22314	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22315	#[cfg_attr(test, assert_instr(vprorvq))]
22316	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22317	pub const fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22318	unsafe {
22319	let ror: Simd = _mm256_rorv_epi64(a, b).as_i64x4();
22320	transmute(src:simd_select_bitmask(m:k, yes:ror, no:i64x4::ZERO))
22321	}
22322	}
22323
22324	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
22325	///
22326	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi64&expand=4742)
22327	#[inline]
22328	#[target_feature(enable = "avx512f,avx512vl")]
22329	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22330	#[cfg_attr(test, assert_instr(vprorvq))]
22331	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22332	pub const fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i {
22333	unsafe {
22334	transmute(src:simd_funnel_shr(
22335	a.as_u64x2(),
22336	b:a.as_u64x2(),
22337	shift:simd_and(x:b.as_u64x2(), y:u64x2::splat(`63`)),
22338	))
22339	}
22340	}
22341
22342	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22343	///
22344	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi64&expand=4740)
22345	#[inline]
22346	#[target_feature(enable = "avx512f,avx512vl")]
22347	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22348	#[cfg_attr(test, assert_instr(vprorvq))]
22349	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22350	pub const fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22351	unsafe {
22352	let ror: Simd = _mm_rorv_epi64(a, b).as_i64x2();
22353	transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i64x2()))
22354	}
22355	}
22356
22357	/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22358	///
22359	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi64&expand=4741)
22360	#[inline]
22361	#[target_feature(enable = "avx512f,avx512vl")]
22362	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22363	#[cfg_attr(test, assert_instr(vprorvq))]
22364	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22365	pub const fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22366	unsafe {
22367	let ror: Simd = _mm_rorv_epi64(a, b).as_i64x2();
22368	transmute(src:simd_select_bitmask(m:k, yes:ror, no:i64x2::ZERO))
22369	}
22370	}
22371
22372	/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
22373	///
22374	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi32&expand=5342)
22375	#[inline]
22376	#[target_feature(enable = "avx512f")]
22377	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22378	#[cfg_attr(test, assert_instr(vpsllvd))]
22379	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22380	pub const fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
22381	unsafe {
22382	let count: Simd = count.as_u32x16();
22383	let no_overflow: u32x16 = simd_lt(x:count, y:u32x16::splat(u32::BITS));
22384	let count: Simd = simd_select(mask:no_overflow, if_true:count, if_false:u32x16::ZERO);
22385	simd_select(mask:no_overflow, if_true:simd_shl(a.as_u32x16(), count), if_false:u32x16::ZERO).as_m512i()
22386	}
22387	}
22388
22389	/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22390	///
22391	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi32&expand=5340)
22392	#[inline]
22393	#[target_feature(enable = "avx512f")]
22394	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22395	#[cfg_attr(test, assert_instr(vpsllvd))]
22396	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22397	pub const fn _mm512_mask_sllv_epi32(
22398	src: __m512i,
22399	k: __mmask16,
22400	a: __m512i,
22401	count: __m512i,
22402	) -> __m512i {
22403	unsafe {
22404	let shf: Simd = _mm512_sllv_epi32(a, count).as_i32x16();
22405	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
22406	}
22407	}
22408
22409	/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22410	///
22411	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi32&expand=5341)
22412	#[inline]
22413	#[target_feature(enable = "avx512f")]
22414	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22415	#[cfg_attr(test, assert_instr(vpsllvd))]
22416	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22417	pub const fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
22418	unsafe {
22419	let shf: Simd = _mm512_sllv_epi32(a, count).as_i32x16();
22420	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
22421	}
22422	}
22423
22424	/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22425	///
22426	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi32&expand=5337)
22427	#[inline]
22428	#[target_feature(enable = "avx512f,avx512vl")]
22429	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22430	#[cfg_attr(test, assert_instr(vpsllvd))]
22431	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22432	pub const fn _mm256_mask_sllv_epi32(
22433	src: __m256i,
22434	k: __mmask8,
22435	a: __m256i,
22436	count: __m256i,
22437	) -> __m256i {
22438	unsafe {
22439	let shf: Simd = _mm256_sllv_epi32(a, count).as_i32x8();
22440	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
22441	}
22442	}
22443
22444	/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22445	///
22446	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi32&expand=5338)
22447	#[inline]
22448	#[target_feature(enable = "avx512f,avx512vl")]
22449	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22450	#[cfg_attr(test, assert_instr(vpsllvd))]
22451	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22452	pub const fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22453	unsafe {
22454	let shf: Simd = _mm256_sllv_epi32(a, count).as_i32x8();
22455	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
22456	}
22457	}
22458
22459	/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22460	///
22461	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi32&expand=5334)
22462	#[inline]
22463	#[target_feature(enable = "avx512f,avx512vl")]
22464	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22465	#[cfg_attr(test, assert_instr(vpsllvd))]
22466	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22467	pub const fn _mm_mask_sllv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22468	unsafe {
22469	let shf: Simd = _mm_sllv_epi32(a, count).as_i32x4();
22470	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
22471	}
22472	}
22473
22474	/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22475	///
22476	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi32&expand=5335)
22477	#[inline]
22478	#[target_feature(enable = "avx512f,avx512vl")]
22479	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22480	#[cfg_attr(test, assert_instr(vpsllvd))]
22481	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22482	pub const fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22483	unsafe {
22484	let shf: Simd = _mm_sllv_epi32(a, count).as_i32x4();
22485	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
22486	}
22487	}
22488
22489	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
22490	///
22491	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi32&expand=5554)
22492	#[inline]
22493	#[target_feature(enable = "avx512f")]
22494	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22495	#[cfg_attr(test, assert_instr(vpsrlvd))]
22496	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22497	pub const fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
22498	unsafe {
22499	let count: Simd = count.as_u32x16();
22500	let no_overflow: u32x16 = simd_lt(x:count, y:u32x16::splat(u32::BITS));
22501	let count: Simd = simd_select(mask:no_overflow, if_true:count, if_false:u32x16::ZERO);
22502	simd_select(mask:no_overflow, if_true:simd_shr(a.as_u32x16(), count), if_false:u32x16::ZERO).as_m512i()
22503	}
22504	}
22505
22506	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22507	///
22508	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi32&expand=5552)
22509	#[inline]
22510	#[target_feature(enable = "avx512f")]
22511	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22512	#[cfg_attr(test, assert_instr(vpsrlvd))]
22513	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22514	pub const fn _mm512_mask_srlv_epi32(
22515	src: __m512i,
22516	k: __mmask16,
22517	a: __m512i,
22518	count: __m512i,
22519	) -> __m512i {
22520	unsafe {
22521	let shf: Simd = _mm512_srlv_epi32(a, count).as_i32x16();
22522	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
22523	}
22524	}
22525
22526	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22527	///
22528	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi32&expand=5553)
22529	#[inline]
22530	#[target_feature(enable = "avx512f")]
22531	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22532	#[cfg_attr(test, assert_instr(vpsrlvd))]
22533	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22534	pub const fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
22535	unsafe {
22536	let shf: Simd = _mm512_srlv_epi32(a, count).as_i32x16();
22537	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
22538	}
22539	}
22540
22541	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22542	///
22543	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi32&expand=5549)
22544	#[inline]
22545	#[target_feature(enable = "avx512f,avx512vl")]
22546	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22547	#[cfg_attr(test, assert_instr(vpsrlvd))]
22548	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22549	pub const fn _mm256_mask_srlv_epi32(
22550	src: __m256i,
22551	k: __mmask8,
22552	a: __m256i,
22553	count: __m256i,
22554	) -> __m256i {
22555	unsafe {
22556	let shf: Simd = _mm256_srlv_epi32(a, count).as_i32x8();
22557	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
22558	}
22559	}
22560
22561	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22562	///
22563	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi32&expand=5550)
22564	#[inline]
22565	#[target_feature(enable = "avx512f,avx512vl")]
22566	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22567	#[cfg_attr(test, assert_instr(vpsrlvd))]
22568	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22569	pub const fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22570	unsafe {
22571	let shf: Simd = _mm256_srlv_epi32(a, count).as_i32x8();
22572	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
22573	}
22574	}
22575
22576	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22577	///
22578	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi32&expand=5546)
22579	#[inline]
22580	#[target_feature(enable = "avx512f,avx512vl")]
22581	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22582	#[cfg_attr(test, assert_instr(vpsrlvd))]
22583	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22584	pub const fn _mm_mask_srlv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22585	unsafe {
22586	let shf: Simd = _mm_srlv_epi32(a, count).as_i32x4();
22587	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
22588	}
22589	}
22590
22591	/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22592	///
22593	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi32&expand=5547)
22594	#[inline]
22595	#[target_feature(enable = "avx512f,avx512vl")]
22596	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22597	#[cfg_attr(test, assert_instr(vpsrlvd))]
22598	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22599	pub const fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22600	unsafe {
22601	let shf: Simd = _mm_srlv_epi32(a, count).as_i32x4();
22602	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
22603	}
22604	}
22605
22606	/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
22607	///
22608	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi64&expand=5351)
22609	#[inline]
22610	#[target_feature(enable = "avx512f")]
22611	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22612	#[cfg_attr(test, assert_instr(vpsllvq))]
22613	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22614	pub const fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
22615	unsafe {
22616	let count: Simd = count.as_u64x8();
22617	let no_overflow: u64x8 = simd_lt(x:count, y:u64x8::splat(u64::BITS as u64));
22618	let count: Simd = simd_select(mask:no_overflow, if_true:count, if_false:u64x8::ZERO);
22619	simd_select(mask:no_overflow, if_true:simd_shl(a.as_u64x8(), count), if_false:u64x8::ZERO).as_m512i()
22620	}
22621	}
22622
22623	/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22624	///
22625	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi64&expand=5349)
22626	#[inline]
22627	#[target_feature(enable = "avx512f")]
22628	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22629	#[cfg_attr(test, assert_instr(vpsllvq))]
22630	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22631	pub const fn _mm512_mask_sllv_epi64(
22632	src: __m512i,
22633	k: __mmask8,
22634	a: __m512i,
22635	count: __m512i,
22636	) -> __m512i {
22637	unsafe {
22638	let shf: Simd = _mm512_sllv_epi64(a, count).as_i64x8();
22639	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
22640	}
22641	}
22642
22643	/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22644	///
22645	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi64&expand=5350)
22646	#[inline]
22647	#[target_feature(enable = "avx512f")]
22648	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22649	#[cfg_attr(test, assert_instr(vpsllvq))]
22650	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22651	pub const fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
22652	unsafe {
22653	let shf: Simd = _mm512_sllv_epi64(a, count).as_i64x8();
22654	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
22655	}
22656	}
22657
22658	/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22659	///
22660	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi64&expand=5346)
22661	#[inline]
22662	#[target_feature(enable = "avx512f,avx512vl")]
22663	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22664	#[cfg_attr(test, assert_instr(vpsllvq))]
22665	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22666	pub const fn _mm256_mask_sllv_epi64(
22667	src: __m256i,
22668	k: __mmask8,
22669	a: __m256i,
22670	count: __m256i,
22671	) -> __m256i {
22672	unsafe {
22673	let shf: Simd = _mm256_sllv_epi64(a, count).as_i64x4();
22674	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
22675	}
22676	}
22677
22678	/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22679	///
22680	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi64&expand=5347)
22681	#[inline]
22682	#[target_feature(enable = "avx512f,avx512vl")]
22683	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22684	#[cfg_attr(test, assert_instr(vpsllvq))]
22685	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22686	pub const fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22687	unsafe {
22688	let shf: Simd = _mm256_sllv_epi64(a, count).as_i64x4();
22689	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
22690	}
22691	}
22692
22693	/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22694	///
22695	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi64&expand=5343)
22696	#[inline]
22697	#[target_feature(enable = "avx512f,avx512vl")]
22698	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22699	#[cfg_attr(test, assert_instr(vpsllvq))]
22700	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22701	pub const fn _mm_mask_sllv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22702	unsafe {
22703	let shf: Simd = _mm_sllv_epi64(a, count).as_i64x2();
22704	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
22705	}
22706	}
22707
22708	/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22709	///
22710	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi64&expand=5344)
22711	#[inline]
22712	#[target_feature(enable = "avx512f,avx512vl")]
22713	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22714	#[cfg_attr(test, assert_instr(vpsllvq))]
22715	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22716	pub const fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22717	unsafe {
22718	let shf: Simd = _mm_sllv_epi64(a, count).as_i64x2();
22719	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
22720	}
22721	}
22722
22723	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
22724	///
22725	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi64&expand=5563)
22726	#[inline]
22727	#[target_feature(enable = "avx512f")]
22728	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22729	#[cfg_attr(test, assert_instr(vpsrlvq))]
22730	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22731	pub const fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
22732	unsafe {
22733	let count: Simd = count.as_u64x8();
22734	let no_overflow: u64x8 = simd_lt(x:count, y:u64x8::splat(u64::BITS as u64));
22735	let count: Simd = simd_select(mask:no_overflow, if_true:count, if_false:u64x8::ZERO);
22736	simd_select(mask:no_overflow, if_true:simd_shr(a.as_u64x8(), count), if_false:u64x8::ZERO).as_m512i()
22737	}
22738	}
22739
22740	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22741	///
22742	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi64&expand=5561)
22743	#[inline]
22744	#[target_feature(enable = "avx512f")]
22745	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22746	#[cfg_attr(test, assert_instr(vpsrlvq))]
22747	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22748	pub const fn _mm512_mask_srlv_epi64(
22749	src: __m512i,
22750	k: __mmask8,
22751	a: __m512i,
22752	count: __m512i,
22753	) -> __m512i {
22754	unsafe {
22755	let shf: Simd = _mm512_srlv_epi64(a, count).as_i64x8();
22756	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
22757	}
22758	}
22759
22760	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22761	///
22762	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi64&expand=5562)
22763	#[inline]
22764	#[target_feature(enable = "avx512f")]
22765	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22766	#[cfg_attr(test, assert_instr(vpsrlvq))]
22767	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22768	pub const fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
22769	unsafe {
22770	let shf: Simd = _mm512_srlv_epi64(a, count).as_i64x8();
22771	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
22772	}
22773	}
22774
22775	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22776	///
22777	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi64&expand=5558)
22778	#[inline]
22779	#[target_feature(enable = "avx512f,avx512vl")]
22780	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22781	#[cfg_attr(test, assert_instr(vpsrlvq))]
22782	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22783	pub const fn _mm256_mask_srlv_epi64(
22784	src: __m256i,
22785	k: __mmask8,
22786	a: __m256i,
22787	count: __m256i,
22788	) -> __m256i {
22789	unsafe {
22790	let shf: Simd = _mm256_srlv_epi64(a, count).as_i64x4();
22791	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
22792	}
22793	}
22794
22795	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22796	///
22797	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi64&expand=5559)
22798	#[inline]
22799	#[target_feature(enable = "avx512f,avx512vl")]
22800	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22801	#[cfg_attr(test, assert_instr(vpsrlvq))]
22802	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22803	pub const fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22804	unsafe {
22805	let shf: Simd = _mm256_srlv_epi64(a, count).as_i64x4();
22806	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
22807	}
22808	}
22809
22810	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22811	///
22812	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi64&expand=5555)
22813	#[inline]
22814	#[target_feature(enable = "avx512f,avx512vl")]
22815	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22816	#[cfg_attr(test, assert_instr(vpsrlvq))]
22817	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22818	pub const fn _mm_mask_srlv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22819	unsafe {
22820	let shf: Simd = _mm_srlv_epi64(a, count).as_i64x2();
22821	transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
22822	}
22823	}
22824
22825	/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22826	///
22827	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi64&expand=5556)
22828	#[inline]
22829	#[target_feature(enable = "avx512f,avx512vl")]
22830	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22831	#[cfg_attr(test, assert_instr(vpsrlvq))]
22832	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22833	pub const fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22834	unsafe {
22835	let shf: Simd = _mm_srlv_epi64(a, count).as_i64x2();
22836	transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
22837	}
22838	}
22839
22840	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22841	///
22842	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_ps&expand=4170)
22843	#[inline]
22844	#[target_feature(enable = "avx512f")]
22845	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22846	#[cfg_attr(test, assert_instr(vshufps, MASK = `0b11_00_01_11`))]
22847	#[rustc_legacy_const_generics(`1`)]
22848	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22849	pub const fn _mm512_permute_ps<const MASK: i32>(a: __m512) -> __m512 {
22850	unsafe {
22851	static_assert_uimm_bits!(MASK, `8`);
22852	simd_shuffle!(
22853	a,
22854	a,
22855	[
22856	MASK as u32 & `0b11`,
22857	(MASK as u32 >> `2`) & `0b11`,
22858	((MASK as u32 >> `4`) & `0b11`),
22859	((MASK as u32 >> `6`) & `0b11`),
22860	(MASK as u32 & `0b11`) + `4`,
22861	((MASK as u32 >> `2`) & `0b11`) + `4`,
22862	((MASK as u32 >> `4`) & `0b11`) + `4`,
22863	((MASK as u32 >> `6`) & `0b11`) + `4`,
22864	(MASK as u32 & `0b11`) + `8`,
22865	((MASK as u32 >> `2`) & `0b11`) + `8`,
22866	((MASK as u32 >> `4`) & `0b11`) + `8`,
22867	((MASK as u32 >> `6`) & `0b11`) + `8`,
22868	(MASK as u32 & `0b11`) + `12`,
22869	((MASK as u32 >> `2`) & `0b11`) + `12`,
22870	((MASK as u32 >> `4`) & `0b11`) + `12`,
22871	((MASK as u32 >> `6`) & `0b11`) + `12`,
22872	],
22873	)
22874	}
22875	}
22876
22877	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22878	///
22879	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_ps&expand=4168)
22880	#[inline]
22881	#[target_feature(enable = "avx512f")]
22882	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22883	#[cfg_attr(test, assert_instr(vshufps, MASK = `0b11_00_01_11`))]
22884	#[rustc_legacy_const_generics(`3`)]
22885	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22886	pub const fn _mm512_mask_permute_ps<const MASK: i32>(
22887	src: __m512,
22888	k: __mmask16,
22889	a: __m512,
22890	) -> __m512 {
22891	unsafe {
22892	static_assert_uimm_bits!(MASK, `8`);
22893	let r: __m512 = _mm512_permute_ps::<MASK>(a);
22894	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
22895	}
22896	}
22897
22898	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22899	///
22900	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_ps&expand=4169)
22901	#[inline]
22902	#[target_feature(enable = "avx512f")]
22903	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22904	#[cfg_attr(test, assert_instr(vshufps, MASK = `0b11_00_01_11`))]
22905	#[rustc_legacy_const_generics(`2`)]
22906	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22907	pub const fn _mm512_maskz_permute_ps<const MASK: i32>(k: __mmask16, a: __m512) -> __m512 {
22908	unsafe {
22909	static_assert_uimm_bits!(MASK, `8`);
22910	let r: __m512 = _mm512_permute_ps::<MASK>(a);
22911	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:f32x16::ZERO))
22912	}
22913	}
22914
22915	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22916	///
22917	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_ps&expand=4165)
22918	#[inline]
22919	#[target_feature(enable = "avx512f,avx512vl")]
22920	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22921	#[cfg_attr(test, assert_instr(vshufps, MASK = `0b11_00_01_11`))]
22922	#[rustc_legacy_const_generics(`3`)]
22923	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22924	pub const fn _mm256_mask_permute_ps<const MASK: i32>(
22925	src: __m256,
22926	k: __mmask8,
22927	a: __m256,
22928	) -> __m256 {
22929	unsafe {
22930	let r: __m256 = _mm256_permute_ps::<MASK>(a);
22931	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
22932	}
22933	}
22934
22935	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22936	///
22937	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_ps&expand=4166)
22938	#[inline]
22939	#[target_feature(enable = "avx512f,avx512vl")]
22940	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22941	#[cfg_attr(test, assert_instr(vshufps, MASK = `0b11_00_01_11`))]
22942	#[rustc_legacy_const_generics(`2`)]
22943	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22944	pub const fn _mm256_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m256) -> __m256 {
22945	unsafe {
22946	let r: __m256 = _mm256_permute_ps::<MASK>(a);
22947	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:f32x8::ZERO))
22948	}
22949	}
22950
22951	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22952	///
22953	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_ps&expand=4162)
22954	#[inline]
22955	#[target_feature(enable = "avx512f,avx512vl")]
22956	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22957	#[cfg_attr(test, assert_instr(vshufps, MASK = `0b11_00_01_11`))]
22958	#[rustc_legacy_const_generics(`3`)]
22959	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22960	pub const fn _mm_mask_permute_ps<const MASK: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
22961	unsafe {
22962	let r: __m128 = _mm_permute_ps::<MASK>(a);
22963	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
22964	}
22965	}
22966
22967	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22968	///
22969	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_ps&expand=4163)
22970	#[inline]
22971	#[target_feature(enable = "avx512f,avx512vl")]
22972	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22973	#[cfg_attr(test, assert_instr(vshufps, MASK = `0b11_00_01_11`))]
22974	#[rustc_legacy_const_generics(`2`)]
22975	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22976	pub const fn _mm_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m128) -> __m128 {
22977	unsafe {
22978	let r: __m128 = _mm_permute_ps::<MASK>(a);
22979	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:f32x4::ZERO))
22980	}
22981	}
22982
22983	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22984	///
22985	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_pd&expand=4161)
22986	#[inline]
22987	#[target_feature(enable = "avx512f")]
22988	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22989	#[cfg_attr(test, assert_instr(vshufpd, MASK = `0b11_01_10_01`))]
22990	#[rustc_legacy_const_generics(`1`)]
22991	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22992	pub const fn _mm512_permute_pd<const MASK: i32>(a: __m512d) -> __m512d {
22993	unsafe {
22994	static_assert_uimm_bits!(MASK, `8`);
22995	simd_shuffle!(
22996	a,
22997	a,
22998	[
22999	MASK as u32 & `0b1`,
23000	((MASK as u32 >> `1`) & `0b1`),
23001	((MASK as u32 >> `2`) & `0b1`) + `2`,
23002	((MASK as u32 >> `3`) & `0b1`) + `2`,
23003	((MASK as u32 >> `4`) & `0b1`) + `4`,
23004	((MASK as u32 >> `5`) & `0b1`) + `4`,
23005	((MASK as u32 >> `6`) & `0b1`) + `6`,
23006	((MASK as u32 >> `7`) & `0b1`) + `6`,
23007	],
23008	)
23009	}
23010	}
23011
23012	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23013	///
23014	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_pd&expand=4159)
23015	#[inline]
23016	#[target_feature(enable = "avx512f")]
23017	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23018	#[cfg_attr(test, assert_instr(vshufpd, MASK = `0b11_01_10_01`))]
23019	#[rustc_legacy_const_generics(`3`)]
23020	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23021	pub const fn _mm512_mask_permute_pd<const MASK: i32>(
23022	src: __m512d,
23023	k: __mmask8,
23024	a: __m512d,
23025	) -> __m512d {
23026	unsafe {
23027	static_assert_uimm_bits!(MASK, `8`);
23028	let r: __m512d = _mm512_permute_pd::<MASK>(a);
23029	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
23030	}
23031	}
23032
23033	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23034	///
23035	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_pd&expand=4160)
23036	#[inline]
23037	#[target_feature(enable = "avx512f")]
23038	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23039	#[cfg_attr(test, assert_instr(vshufpd, MASK = `0b11_01_10_01`))]
23040	#[rustc_legacy_const_generics(`2`)]
23041	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23042	pub const fn _mm512_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
23043	unsafe {
23044	static_assert_uimm_bits!(MASK, `8`);
23045	let r: __m512d = _mm512_permute_pd::<MASK>(a);
23046	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
23047	}
23048	}
23049
23050	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23051	///
23052	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_pd&expand=4156)
23053	#[inline]
23054	#[target_feature(enable = "avx512f,avx512vl")]
23055	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23056	#[cfg_attr(test, assert_instr(vshufpd, MASK = `0b11_01`))]
23057	#[rustc_legacy_const_generics(`3`)]
23058	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23059	pub const fn _mm256_mask_permute_pd<const MASK: i32>(
23060	src: __m256d,
23061	k: __mmask8,
23062	a: __m256d,
23063	) -> __m256d {
23064	unsafe {
23065	static_assert_uimm_bits!(MASK, `4`);
23066	let r: __m256d = _mm256_permute_pd::<MASK>(a);
23067	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
23068	}
23069	}
23070
23071	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23072	///
23073	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_pd&expand=4157)
23074	#[inline]
23075	#[target_feature(enable = "avx512f,avx512vl")]
23076	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23077	#[cfg_attr(test, assert_instr(vshufpd, MASK = `0b11_01`))]
23078	#[rustc_legacy_const_generics(`2`)]
23079	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23080	pub const fn _mm256_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
23081	unsafe {
23082	static_assert_uimm_bits!(MASK, `4`);
23083	let r: __m256d = _mm256_permute_pd::<MASK>(a);
23084	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
23085	}
23086	}
23087
23088	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23089	///
23090	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_pd&expand=4153)
23091	#[inline]
23092	#[target_feature(enable = "avx512f,avx512vl")]
23093	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23094	#[cfg_attr(test, assert_instr(vshufpd, IMM2 = `0b01`))]
23095	#[rustc_legacy_const_generics(`3`)]
23096	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23097	pub const fn _mm_mask_permute_pd<const IMM2: i32>(
23098	src: __m128d,
23099	k: __mmask8,
23100	a: __m128d,
23101	) -> __m128d {
23102	unsafe {
23103	static_assert_uimm_bits!(IMM2, `2`);
23104	let r: __m128d = _mm_permute_pd::<IMM2>(a);
23105	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:src.as_f64x2()))
23106	}
23107	}
23108
23109	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23110	///
23111	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_pd&expand=4154)
23112	#[inline]
23113	#[target_feature(enable = "avx512f,avx512vl")]
23114	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23115	#[cfg_attr(test, assert_instr(vshufpd, IMM2 = `0b01`))]
23116	#[rustc_legacy_const_generics(`2`)]
23117	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23118	pub const fn _mm_maskz_permute_pd<const IMM2: i32>(k: __mmask8, a: __m128d) -> __m128d {
23119	unsafe {
23120	static_assert_uimm_bits!(IMM2, `2`);
23121	let r: __m128d = _mm_permute_pd::<IMM2>(a);
23122	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:f64x2::ZERO))
23123	}
23124	}
23125
23126	/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
23127	///
23128	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_epi64&expand=4208)
23129	#[inline]
23130	#[target_feature(enable = "avx512f")]
23131	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23132	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermq
23133	#[rustc_legacy_const_generics(`1`)]
23134	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23135	pub const fn _mm512_permutex_epi64<const MASK: i32>(a: __m512i) -> __m512i {
23136	unsafe {
23137	static_assert_uimm_bits!(MASK, `8`);
23138	simd_shuffle!(
23139	a,
23140	a,
23141	[
23142	MASK as u32 & `0b11`,
23143	(MASK as u32 >> `2`) & `0b11`,
23144	((MASK as u32 >> `4`) & `0b11`),
23145	((MASK as u32 >> `6`) & `0b11`),
23146	(MASK as u32 & `0b11`) + `4`,
23147	((MASK as u32 >> `2`) & `0b11`) + `4`,
23148	((MASK as u32 >> `4`) & `0b11`) + `4`,
23149	((MASK as u32 >> `6`) & `0b11`) + `4`,
23150	],
23151	)
23152	}
23153	}
23154
23155	/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23156	///
23157	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_epi64&expand=4206)
23158	#[inline]
23159	#[target_feature(enable = "avx512f")]
23160	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23161	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermq
23162	#[rustc_legacy_const_generics(`3`)]
23163	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23164	pub const fn _mm512_mask_permutex_epi64<const MASK: i32>(
23165	src: __m512i,
23166	k: __mmask8,
23167	a: __m512i,
23168	) -> __m512i {
23169	unsafe {
23170	static_assert_uimm_bits!(MASK, `8`);
23171	let r: __m512i = _mm512_permutex_epi64::<MASK>(a);
23172	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
23173	}
23174	}
23175
23176	/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23177	///
23178	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_epi64&expand=4207)
23179	#[inline]
23180	#[target_feature(enable = "avx512f")]
23181	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23182	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermq
23183	#[rustc_legacy_const_generics(`2`)]
23184	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23185	pub const fn _mm512_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m512i) -> __m512i {
23186	unsafe {
23187	static_assert_uimm_bits!(MASK, `8`);
23188	let r: __m512i = _mm512_permutex_epi64::<MASK>(a);
23189	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:i64x8::ZERO))
23190	}
23191	}
23192
23193	/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
23194	///
23195	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_epi64&expand=4205)
23196	#[inline]
23197	#[target_feature(enable = "avx512f,avx512vl")]
23198	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23199	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermq
23200	#[rustc_legacy_const_generics(`1`)]
23201	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23202	pub const fn _mm256_permutex_epi64<const MASK: i32>(a: __m256i) -> __m256i {
23203	unsafe {
23204	static_assert_uimm_bits!(MASK, `8`);
23205	simd_shuffle!(
23206	a,
23207	a,
23208	[
23209	MASK as u32 & `0b11`,
23210	(MASK as u32 >> `2`) & `0b11`,
23211	((MASK as u32 >> `4`) & `0b11`),
23212	((MASK as u32 >> `6`) & `0b11`),
23213	],
23214	)
23215	}
23216	}
23217
23218	/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23219	///
23220	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_epi64&expand=4203)
23221	#[inline]
23222	#[target_feature(enable = "avx512f,avx512vl")]
23223	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23224	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermq
23225	#[rustc_legacy_const_generics(`3`)]
23226	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23227	pub const fn _mm256_mask_permutex_epi64<const MASK: i32>(
23228	src: __m256i,
23229	k: __mmask8,
23230	a: __m256i,
23231	) -> __m256i {
23232	unsafe {
23233	static_assert_uimm_bits!(MASK, `8`);
23234	let r: __m256i = _mm256_permutex_epi64::<MASK>(a);
23235	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
23236	}
23237	}
23238
23239	/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23240	///
23241	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_epi64&expand=4204)
23242	#[inline]
23243	#[target_feature(enable = "avx512f,avx512vl")]
23244	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23245	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermq
23246	#[rustc_legacy_const_generics(`2`)]
23247	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23248	pub const fn _mm256_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m256i) -> __m256i {
23249	unsafe {
23250	static_assert_uimm_bits!(MASK, `8`);
23251	let r: __m256i = _mm256_permutex_epi64::<MASK>(a);
23252	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:i64x4::ZERO))
23253	}
23254	}
23255
23256	/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
23257	///
23258	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_pd&expand=4214)
23259	#[inline]
23260	#[target_feature(enable = "avx512f")]
23261	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23262	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermpd
23263	#[rustc_legacy_const_generics(`1`)]
23264	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23265	pub const fn _mm512_permutex_pd<const MASK: i32>(a: __m512d) -> __m512d {
23266	unsafe {
23267	static_assert_uimm_bits!(MASK, `8`);
23268	simd_shuffle!(
23269	a,
23270	a,
23271	[
23272	MASK as u32 & `0b11`,
23273	(MASK as u32 >> `2`) & `0b11`,
23274	((MASK as u32 >> `4`) & `0b11`),
23275	((MASK as u32 >> `6`) & `0b11`),
23276	(MASK as u32 & `0b11`) + `4`,
23277	((MASK as u32 >> `2`) & `0b11`) + `4`,
23278	((MASK as u32 >> `4`) & `0b11`) + `4`,
23279	((MASK as u32 >> `6`) & `0b11`) + `4`,
23280	],
23281	)
23282	}
23283	}
23284
23285	/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23286	///
23287	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_pd&expand=4212)
23288	#[inline]
23289	#[target_feature(enable = "avx512f")]
23290	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23291	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermpd
23292	#[rustc_legacy_const_generics(`3`)]
23293	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23294	pub const fn _mm512_mask_permutex_pd<const MASK: i32>(
23295	src: __m512d,
23296	k: __mmask8,
23297	a: __m512d,
23298	) -> __m512d {
23299	unsafe {
23300	let r: __m512d = _mm512_permutex_pd::<MASK>(a);
23301	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
23302	}
23303	}
23304
23305	/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23306	///
23307	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_pd&expand=4213)
23308	#[inline]
23309	#[target_feature(enable = "avx512f")]
23310	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23311	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermpd
23312	#[rustc_legacy_const_generics(`2`)]
23313	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23314	pub const fn _mm512_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
23315	unsafe {
23316	let r: __m512d = _mm512_permutex_pd::<MASK>(a);
23317	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
23318	}
23319	}
23320
23321	/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
23322	///
23323	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_pd&expand=4211)
23324	#[inline]
23325	#[target_feature(enable = "avx512f,avx512vl")]
23326	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23327	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermpd
23328	#[rustc_legacy_const_generics(`1`)]
23329	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23330	pub const fn _mm256_permutex_pd<const MASK: i32>(a: __m256d) -> __m256d {
23331	unsafe {
23332	static_assert_uimm_bits!(MASK, `8`);
23333	simd_shuffle!(
23334	a,
23335	a,
23336	[
23337	MASK as u32 & `0b11`,
23338	(MASK as u32 >> `2`) & `0b11`,
23339	((MASK as u32 >> `4`) & `0b11`),
23340	((MASK as u32 >> `6`) & `0b11`),
23341	],
23342	)
23343	}
23344	}
23345
23346	/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23347	///
23348	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_pd&expand=4209)
23349	#[inline]
23350	#[target_feature(enable = "avx512f,avx512vl")]
23351	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23352	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermpd
23353	#[rustc_legacy_const_generics(`3`)]
23354	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23355	pub const fn _mm256_mask_permutex_pd<const MASK: i32>(
23356	src: __m256d,
23357	k: __mmask8,
23358	a: __m256d,
23359	) -> __m256d {
23360	unsafe {
23361	static_assert_uimm_bits!(MASK, `8`);
23362	let r: __m256d = _mm256_permutex_pd::<MASK>(a);
23363	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
23364	}
23365	}
23366
23367	/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23368	///
23369	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_pd&expand=4210)
23370	#[inline]
23371	#[target_feature(enable = "avx512f,avx512vl")]
23372	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23373	#[cfg_attr(test, assert_instr(vperm, MASK = `0b10_01_10_11`))] //should be vpermpd
23374	#[rustc_legacy_const_generics(`2`)]
23375	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23376	pub const fn _mm256_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
23377	unsafe {
23378	static_assert_uimm_bits!(MASK, `8`);
23379	let r: __m256d = _mm256_permutex_pd::<MASK>(a);
23380	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
23381	}
23382	}
23383
23384	/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_permutexvar_epi32, and it is recommended that you use that intrinsic name.
23385	///
23386	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_epi32&expand=4182)
23387	#[inline]
23388	#[target_feature(enable = "avx512f")]
23389	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23390	#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
23391	pub fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i {
23392	unsafe { transmute(src:vpermd(a.as_i32x16(), idx.as_i32x16())) }
23393	}
23394
23395	/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name.
23396	///
23397	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_epi32&expand=4181)
23398	#[inline]
23399	#[target_feature(enable = "avx512f")]
23400	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23401	#[cfg_attr(test, assert_instr(vpermd))]
23402	pub fn _mm512_mask_permutevar_epi32(
23403	src: __m512i,
23404	k: __mmask16,
23405	idx: __m512i,
23406	a: __m512i,
23407	) -> __m512i {
23408	unsafe {
23409	let permute: Simd = _mm512_permutevar_epi32(idx, a).as_i32x16();
23410	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i32x16()))
23411	}
23412	}
23413
23414	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
23415	///
23416	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_ps&expand=4200)
23417	#[inline]
23418	#[target_feature(enable = "avx512f")]
23419	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23420	#[cfg_attr(test, assert_instr(vpermilps))]
23421	pub fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 {
23422	unsafe { transmute(src:vpermilps(a.as_f32x16(), b.as_i32x16())) }
23423	}
23424
23425	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23426	///
23427	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_ps&expand=4198)
23428	#[inline]
23429	#[target_feature(enable = "avx512f")]
23430	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23431	#[cfg_attr(test, assert_instr(vpermilps))]
23432	pub fn _mm512_mask_permutevar_ps(src: __m512, k: __mmask16, a: __m512, b: __m512i) -> __m512 {
23433	unsafe {
23434	let permute: Simd = _mm512_permutevar_ps(a, b).as_f32x16();
23435	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x16()))
23436	}
23437	}
23438
23439	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23440	///
23441	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_ps&expand=4199)
23442	#[inline]
23443	#[target_feature(enable = "avx512f")]
23444	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23445	#[cfg_attr(test, assert_instr(vpermilps))]
23446	pub fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 {
23447	unsafe {
23448	let permute: Simd = _mm512_permutevar_ps(a, b).as_f32x16();
23449	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x16::ZERO))
23450	}
23451	}
23452
23453	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23454	///
23455	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm256_mask_permutevar_ps&expand=4195)
23456	#[inline]
23457	#[target_feature(enable = "avx512f,avx512vl")]
23458	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23459	#[cfg_attr(test, assert_instr(vpermilps))]
23460	pub fn _mm256_mask_permutevar_ps(src: __m256, k: __mmask8, a: __m256, b: __m256i) -> __m256 {
23461	unsafe {
23462	let permute: Simd = _mm256_permutevar_ps(a, b).as_f32x8();
23463	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x8()))
23464	}
23465	}
23466
23467	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23468	///
23469	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_ps&expand=4196)
23470	#[inline]
23471	#[target_feature(enable = "avx512f,avx512vl")]
23472	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23473	#[cfg_attr(test, assert_instr(vpermilps))]
23474	pub fn _mm256_maskz_permutevar_ps(k: __mmask8, a: __m256, b: __m256i) -> __m256 {
23475	unsafe {
23476	let permute: Simd = _mm256_permutevar_ps(a, b).as_f32x8();
23477	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x8::ZERO))
23478	}
23479	}
23480
23481	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23482	///
23483	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_ps&expand=4192)
23484	#[inline]
23485	#[target_feature(enable = "avx512f,avx512vl")]
23486	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23487	#[cfg_attr(test, assert_instr(vpermilps))]
23488	pub fn _mm_mask_permutevar_ps(src: __m128, k: __mmask8, a: __m128, b: __m128i) -> __m128 {
23489	unsafe {
23490	let permute: Simd = _mm_permutevar_ps(a, b).as_f32x4();
23491	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x4()))
23492	}
23493	}
23494
23495	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23496	///
23497	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_ps&expand=4193)
23498	#[inline]
23499	#[target_feature(enable = "avx512f,avx512vl")]
23500	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23501	#[cfg_attr(test, assert_instr(vpermilps))]
23502	pub fn _mm_maskz_permutevar_ps(k: __mmask8, a: __m128, b: __m128i) -> __m128 {
23503	unsafe {
23504	let permute: Simd = _mm_permutevar_ps(a, b).as_f32x4();
23505	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x4::ZERO))
23506	}
23507	}
23508
23509	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
23510	///
23511	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_pd&expand=4191)
23512	#[inline]
23513	#[target_feature(enable = "avx512f")]
23514	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23515	#[cfg_attr(test, assert_instr(vpermilpd))]
23516	pub fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d {
23517	unsafe { transmute(src:vpermilpd(a.as_f64x8(), b.as_i64x8())) }
23518	}
23519
23520	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23521	///
23522	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_pd&expand=4189)
23523	#[inline]
23524	#[target_feature(enable = "avx512f")]
23525	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23526	#[cfg_attr(test, assert_instr(vpermilpd))]
23527	pub fn _mm512_mask_permutevar_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
23528	unsafe {
23529	let permute: Simd = _mm512_permutevar_pd(a, b).as_f64x8();
23530	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x8()))
23531	}
23532	}
23533
23534	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23535	///
23536	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_pd&expand=4190)
23537	#[inline]
23538	#[target_feature(enable = "avx512f")]
23539	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23540	#[cfg_attr(test, assert_instr(vpermilpd))]
23541	pub fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
23542	unsafe {
23543	let permute: Simd = _mm512_permutevar_pd(a, b).as_f64x8();
23544	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x8::ZERO))
23545	}
23546	}
23547
23548	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23549	///
23550	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutevar_pd&expand=4186)
23551	#[inline]
23552	#[target_feature(enable = "avx512f,avx512vl")]
23553	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23554	#[cfg_attr(test, assert_instr(vpermilpd))]
23555	pub fn _mm256_mask_permutevar_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
23556	unsafe {
23557	let permute: Simd = _mm256_permutevar_pd(a, b).as_f64x4();
23558	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x4()))
23559	}
23560	}
23561
23562	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23563	///
23564	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_pd&expand=4187)
23565	#[inline]
23566	#[target_feature(enable = "avx512f,avx512vl")]
23567	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23568	#[cfg_attr(test, assert_instr(vpermilpd))]
23569	pub fn _mm256_maskz_permutevar_pd(k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
23570	unsafe {
23571	let permute: Simd = _mm256_permutevar_pd(a, b).as_f64x4();
23572	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x4::ZERO))
23573	}
23574	}
23575
23576	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23577	///
23578	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_pd&expand=4183)
23579	#[inline]
23580	#[target_feature(enable = "avx512f,avx512vl")]
23581	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23582	#[cfg_attr(test, assert_instr(vpermilpd))]
23583	pub fn _mm_mask_permutevar_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
23584	unsafe {
23585	let permute: Simd = _mm_permutevar_pd(a, b).as_f64x2();
23586	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x2()))
23587	}
23588	}
23589
23590	/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23591	///
23592	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_pd&expand=4184)
23593	#[inline]
23594	#[target_feature(enable = "avx512f,avx512vl")]
23595	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23596	#[cfg_attr(test, assert_instr(vpermilpd))]
23597	pub fn _mm_maskz_permutevar_pd(k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
23598	unsafe {
23599	let permute: Simd = _mm_permutevar_pd(a, b).as_f64x2();
23600	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x2::ZERO))
23601	}
23602	}
23603
23604	/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
23605	///
23606	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi32&expand=4301)
23607	#[inline]
23608	#[target_feature(enable = "avx512f")]
23609	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23610	#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
23611	pub fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i {
23612	unsafe { transmute(src:vpermd(a.as_i32x16(), idx.as_i32x16())) }
23613	}
23614
23615	/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23616	///
23617	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi32&expand=4299)
23618	#[inline]
23619	#[target_feature(enable = "avx512f")]
23620	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23621	#[cfg_attr(test, assert_instr(vpermd))]
23622	pub fn _mm512_mask_permutexvar_epi32(
23623	src: __m512i,
23624	k: __mmask16,
23625	idx: __m512i,
23626	a: __m512i,
23627	) -> __m512i {
23628	unsafe {
23629	let permute: Simd = _mm512_permutexvar_epi32(idx, a).as_i32x16();
23630	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i32x16()))
23631	}
23632	}
23633
23634	/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23635	///
23636	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi32&expand=4300)
23637	#[inline]
23638	#[target_feature(enable = "avx512f")]
23639	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23640	#[cfg_attr(test, assert_instr(vpermd))]
23641	pub fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i {
23642	unsafe {
23643	let permute: Simd = _mm512_permutexvar_epi32(idx, a).as_i32x16();
23644	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x16::ZERO))
23645	}
23646	}
23647
23648	/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
23649	///
23650	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi32&expand=4298)
23651	#[inline]
23652	#[target_feature(enable = "avx512f,avx512vl")]
23653	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23654	#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
23655	pub fn _mm256_permutexvar_epi32(idx: __m256i, a: __m256i) -> __m256i {
23656	_mm256_permutevar8x32_epi32(a, b:idx) // llvm use llvm.x86.avx2.permd
23657	}
23658
23659	/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23660	///
23661	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi32&expand=4296)
23662	#[inline]
23663	#[target_feature(enable = "avx512f,avx512vl")]
23664	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23665	#[cfg_attr(test, assert_instr(vpermd))]
23666	pub fn _mm256_mask_permutexvar_epi32(
23667	src: __m256i,
23668	k: __mmask8,
23669	idx: __m256i,
23670	a: __m256i,
23671	) -> __m256i {
23672	unsafe {
23673	let permute: Simd = _mm256_permutexvar_epi32(idx, a).as_i32x8();
23674	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i32x8()))
23675	}
23676	}
23677
23678	/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23679	///
23680	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi32&expand=4297)
23681	#[inline]
23682	#[target_feature(enable = "avx512f,avx512vl")]
23683	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23684	#[cfg_attr(test, assert_instr(vpermd))]
23685	pub fn _mm256_maskz_permutexvar_epi32(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
23686	unsafe {
23687	let permute: Simd = _mm256_permutexvar_epi32(idx, a).as_i32x8();
23688	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x8::ZERO))
23689	}
23690	}
23691
23692	/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
23693	///
23694	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi64&expand=4307)
23695	#[inline]
23696	#[target_feature(enable = "avx512f")]
23697	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23698	#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
23699	pub fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i {
23700	unsafe { transmute(src:vpermq(a.as_i64x8(), idx.as_i64x8())) }
23701	}
23702
23703	/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23704	///
23705	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi64&expand=4305)
23706	#[inline]
23707	#[target_feature(enable = "avx512f")]
23708	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23709	#[cfg_attr(test, assert_instr(vpermq))]
23710	pub fn _mm512_mask_permutexvar_epi64(
23711	src: __m512i,
23712	k: __mmask8,
23713	idx: __m512i,
23714	a: __m512i,
23715	) -> __m512i {
23716	unsafe {
23717	let permute: Simd = _mm512_permutexvar_epi64(idx, a).as_i64x8();
23718	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i64x8()))
23719	}
23720	}
23721
23722	/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23723	///
23724	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi64&expand=4306)
23725	#[inline]
23726	#[target_feature(enable = "avx512f")]
23727	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23728	#[cfg_attr(test, assert_instr(vpermq))]
23729	pub fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i {
23730	unsafe {
23731	let permute: Simd = _mm512_permutexvar_epi64(idx, a).as_i64x8();
23732	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x8::ZERO))
23733	}
23734	}
23735
23736	/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
23737	///
23738	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi64&expand=4304)
23739	#[inline]
23740	#[target_feature(enable = "avx512f,avx512vl")]
23741	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23742	#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
23743	pub fn _mm256_permutexvar_epi64(idx: __m256i, a: __m256i) -> __m256i {
23744	unsafe { transmute(src:vpermq256(a.as_i64x4(), idx.as_i64x4())) }
23745	}
23746
23747	/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23748	///
23749	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi64&expand=4302)
23750	#[inline]
23751	#[target_feature(enable = "avx512f,avx512vl")]
23752	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23753	#[cfg_attr(test, assert_instr(vpermq))]
23754	pub fn _mm256_mask_permutexvar_epi64(
23755	src: __m256i,
23756	k: __mmask8,
23757	idx: __m256i,
23758	a: __m256i,
23759	) -> __m256i {
23760	unsafe {
23761	let permute: Simd = _mm256_permutexvar_epi64(idx, a).as_i64x4();
23762	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i64x4()))
23763	}
23764	}
23765
23766	/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23767	///
23768	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi64&expand=4303)
23769	#[inline]
23770	#[target_feature(enable = "avx512f,avx512vl")]
23771	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23772	#[cfg_attr(test, assert_instr(vpermq))]
23773	pub fn _mm256_maskz_permutexvar_epi64(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
23774	unsafe {
23775	let permute: Simd = _mm256_permutexvar_epi64(idx, a).as_i64x4();
23776	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x4::ZERO))
23777	}
23778	}
23779
23780	/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
23781	///
23782	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_ps&expand=4200)
23783	#[inline]
23784	#[target_feature(enable = "avx512f")]
23785	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23786	#[cfg_attr(test, assert_instr(vpermps))]
23787	pub fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 {
23788	unsafe { transmute(src:vpermps(a.as_f32x16(), idx.as_i32x16())) }
23789	}
23790
23791	/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23792	///
23793	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_ps&expand=4326)
23794	#[inline]
23795	#[target_feature(enable = "avx512f")]
23796	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23797	#[cfg_attr(test, assert_instr(vpermps))]
23798	pub fn _mm512_mask_permutexvar_ps(src: __m512, k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
23799	unsafe {
23800	let permute: Simd = _mm512_permutexvar_ps(idx, a).as_f32x16();
23801	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x16()))
23802	}
23803	}
23804
23805	/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23806	///
23807	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_ps&expand=4327)
23808	#[inline]
23809	#[target_feature(enable = "avx512f")]
23810	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23811	#[cfg_attr(test, assert_instr(vpermps))]
23812	pub fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
23813	unsafe {
23814	let permute: Simd = _mm512_permutexvar_ps(idx, a).as_f32x16();
23815	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x16::ZERO))
23816	}
23817	}
23818
23819	/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
23820	///
23821	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_ps&expand=4325)
23822	#[inline]
23823	#[target_feature(enable = "avx512f,avx512vl")]
23824	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23825	#[cfg_attr(test, assert_instr(vpermps))]
23826	pub fn _mm256_permutexvar_ps(idx: __m256i, a: __m256) -> __m256 {
23827	_mm256_permutevar8x32_ps(a, idx) //llvm.x86.avx2.permps
23828	}
23829
23830	/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23831	///
23832	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_ps&expand=4323)
23833	#[inline]
23834	#[target_feature(enable = "avx512f,avx512vl")]
23835	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23836	#[cfg_attr(test, assert_instr(vpermps))]
23837	pub fn _mm256_mask_permutexvar_ps(src: __m256, k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23838	unsafe {
23839	let permute: Simd = _mm256_permutexvar_ps(idx, a).as_f32x8();
23840	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x8()))
23841	}
23842	}
23843
23844	/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23845	///
23846	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_ps&expand=4324)
23847	#[inline]
23848	#[target_feature(enable = "avx512f,avx512vl")]
23849	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23850	#[cfg_attr(test, assert_instr(vpermps))]
23851	pub fn _mm256_maskz_permutexvar_ps(k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23852	unsafe {
23853	let permute: Simd = _mm256_permutexvar_ps(idx, a).as_f32x8();
23854	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x8::ZERO))
23855	}
23856	}
23857
23858	/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23859	///
23860	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_pd&expand=4322)
23861	#[inline]
23862	#[target_feature(enable = "avx512f")]
23863	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23864	#[cfg_attr(test, assert_instr(vpermpd))]
23865	pub fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d {
23866	unsafe { transmute(src:vpermpd(a.as_f64x8(), idx.as_i64x8())) }
23867	}
23868
23869	/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23870	///
23871	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_pd&expand=4320)
23872	#[inline]
23873	#[target_feature(enable = "avx512f")]
23874	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23875	#[cfg_attr(test, assert_instr(vpermpd))]
23876	pub fn _mm512_mask_permutexvar_pd(src: __m512d, k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23877	unsafe {
23878	let permute: Simd = _mm512_permutexvar_pd(idx, a).as_f64x8();
23879	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x8()))
23880	}
23881	}
23882
23883	/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23884	///
23885	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_pd&expand=4321)
23886	#[inline]
23887	#[target_feature(enable = "avx512f")]
23888	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23889	#[cfg_attr(test, assert_instr(vpermpd))]
23890	pub fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23891	unsafe {
23892	let permute: Simd = _mm512_permutexvar_pd(idx, a).as_f64x8();
23893	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x8::ZERO))
23894	}
23895	}
23896
23897	/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23898	///
23899	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_pd&expand=4319)
23900	#[inline]
23901	#[target_feature(enable = "avx512f,avx512vl")]
23902	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23903	#[cfg_attr(test, assert_instr(vpermpd))]
23904	pub fn _mm256_permutexvar_pd(idx: __m256i, a: __m256d) -> __m256d {
23905	unsafe { transmute(src:vpermpd256(a.as_f64x4(), idx.as_i64x4())) }
23906	}
23907
23908	/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23909	///
23910	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_pd&expand=4317)
23911	#[inline]
23912	#[target_feature(enable = "avx512f,avx512vl")]
23913	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23914	#[cfg_attr(test, assert_instr(vpermpd))]
23915	pub fn _mm256_mask_permutexvar_pd(src: __m256d, k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23916	unsafe {
23917	let permute: Simd = _mm256_permutexvar_pd(idx, a).as_f64x4();
23918	transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x4()))
23919	}
23920	}
23921
23922	/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23923	///
23924	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_pd&expand=4318)
23925	#[inline]
23926	#[target_feature(enable = "avx512f,avx512vl")]
23927	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23928	#[cfg_attr(test, assert_instr(vpermpd))]
23929	pub fn _mm256_maskz_permutexvar_pd(k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23930	unsafe {
23931	let permute: Simd = _mm256_permutexvar_pd(idx, a).as_f64x4();
23932	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x4::ZERO))
23933	}
23934	}
23935
23936	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23937	///
23938	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi32&expand=4238)
23939	#[inline]
23940	#[target_feature(enable = "avx512f")]
23941	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23942	#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23943	pub fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
23944	unsafe { transmute(src:vpermi2d(a.as_i32x16(), idx.as_i32x16(), b.as_i32x16())) }
23945	}
23946
23947	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23948	///
23949	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi32&expand=4235)
23950	#[inline]
23951	#[target_feature(enable = "avx512f")]
23952	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23953	#[cfg_attr(test, assert_instr(vpermt2d))]
23954	pub fn _mm512_mask_permutex2var_epi32(
23955	a: __m512i,
23956	k: __mmask16,
23957	idx: __m512i,
23958	b: __m512i,
23959	) -> __m512i {
23960	unsafe {
23961	let permute: Simd = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23962	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i32x16()))
23963	}
23964	}
23965
23966	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23967	///
23968	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi32&expand=4237)
23969	#[inline]
23970	#[target_feature(enable = "avx512f")]
23971	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23972	#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23973	pub fn _mm512_maskz_permutex2var_epi32(
23974	k: __mmask16,
23975	a: __m512i,
23976	idx: __m512i,
23977	b: __m512i,
23978	) -> __m512i {
23979	unsafe {
23980	let permute: Simd = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23981	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x16::ZERO))
23982	}
23983	}
23984
23985	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23986	///
23987	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi32&expand=4236)
23988	#[inline]
23989	#[target_feature(enable = "avx512f")]
23990	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23991	#[cfg_attr(test, assert_instr(vpermi2d))]
23992	pub fn _mm512_mask2_permutex2var_epi32(
23993	a: __m512i,
23994	idx: __m512i,
23995	k: __mmask16,
23996	b: __m512i,
23997	) -> __m512i {
23998	unsafe {
23999	let permute: Simd = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
24000	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i32x16()))
24001	}
24002	}
24003
24004	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24005	///
24006	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi32&expand=4234)
24007	#[inline]
24008	#[target_feature(enable = "avx512f,avx512vl")]
24009	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24010	#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
24011	pub fn _mm256_permutex2var_epi32(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
24012	unsafe { transmute(src:vpermi2d256(a.as_i32x8(), idx.as_i32x8(), b.as_i32x8())) }
24013	}
24014
24015	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24016	///
24017	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi32&expand=4231)
24018	#[inline]
24019	#[target_feature(enable = "avx512f,avx512vl")]
24020	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24021	#[cfg_attr(test, assert_instr(vpermt2d))]
24022	pub fn _mm256_mask_permutex2var_epi32(
24023	a: __m256i,
24024	k: __mmask8,
24025	idx: __m256i,
24026	b: __m256i,
24027	) -> __m256i {
24028	unsafe {
24029	let permute: Simd = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
24030	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i32x8()))
24031	}
24032	}
24033
24034	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24035	///
24036	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi32&expand=4233)
24037	#[inline]
24038	#[target_feature(enable = "avx512f,avx512vl")]
24039	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24040	#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
24041	pub fn _mm256_maskz_permutex2var_epi32(
24042	k: __mmask8,
24043	a: __m256i,
24044	idx: __m256i,
24045	b: __m256i,
24046	) -> __m256i {
24047	unsafe {
24048	let permute: Simd = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
24049	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x8::ZERO))
24050	}
24051	}
24052
24053	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24054	///
24055	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi32&expand=4232)
24056	#[inline]
24057	#[target_feature(enable = "avx512f,avx512vl")]
24058	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24059	#[cfg_attr(test, assert_instr(vpermi2d))]
24060	pub fn _mm256_mask2_permutex2var_epi32(
24061	a: __m256i,
24062	idx: __m256i,
24063	k: __mmask8,
24064	b: __m256i,
24065	) -> __m256i {
24066	unsafe {
24067	let permute: Simd = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
24068	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i32x8()))
24069	}
24070	}
24071
24072	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24073	///
24074	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi32&expand=4230)
24075	#[inline]
24076	#[target_feature(enable = "avx512f,avx512vl")]
24077	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24078	#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
24079	pub fn _mm_permutex2var_epi32(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
24080	unsafe { transmute(src:vpermi2d128(a.as_i32x4(), idx.as_i32x4(), b.as_i32x4())) }
24081	}
24082
24083	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24084	///
24085	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi32&expand=4227)
24086	#[inline]
24087	#[target_feature(enable = "avx512f,avx512vl")]
24088	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24089	#[cfg_attr(test, assert_instr(vpermt2d))]
24090	pub fn _mm_mask_permutex2var_epi32(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
24091	unsafe {
24092	let permute: Simd = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
24093	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i32x4()))
24094	}
24095	}
24096
24097	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24098	///
24099	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi32&expand=4229)
24100	#[inline]
24101	#[target_feature(enable = "avx512f,avx512vl")]
24102	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24103	#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
24104	pub fn _mm_maskz_permutex2var_epi32(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
24105	unsafe {
24106	let permute: Simd = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
24107	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x4::ZERO))
24108	}
24109	}
24110
24111	/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24112	///
24113	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi32&expand=4228)
24114	#[inline]
24115	#[target_feature(enable = "avx512f,avx512vl")]
24116	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24117	#[cfg_attr(test, assert_instr(vpermi2d))]
24118	pub fn _mm_mask2_permutex2var_epi32(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
24119	unsafe {
24120	let permute: Simd = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
24121	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i32x4()))
24122	}
24123	}
24124
24125	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24126	///
24127	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi64&expand=4250)
24128	#[inline]
24129	#[target_feature(enable = "avx512f")]
24130	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24131	#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24132	pub fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
24133	unsafe { transmute(src:vpermi2q(a.as_i64x8(), idx.as_i64x8(), b.as_i64x8())) }
24134	}
24135
24136	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24137	///
24138	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi64&expand=4247)
24139	#[inline]
24140	#[target_feature(enable = "avx512f")]
24141	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24142	#[cfg_attr(test, assert_instr(vpermt2q))]
24143	pub fn _mm512_mask_permutex2var_epi64(
24144	a: __m512i,
24145	k: __mmask8,
24146	idx: __m512i,
24147	b: __m512i,
24148	) -> __m512i {
24149	unsafe {
24150	let permute: Simd = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
24151	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i64x8()))
24152	}
24153	}
24154
24155	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24156	///
24157	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi64&expand=4249)
24158	#[inline]
24159	#[target_feature(enable = "avx512f")]
24160	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24161	#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24162	pub fn _mm512_maskz_permutex2var_epi64(
24163	k: __mmask8,
24164	a: __m512i,
24165	idx: __m512i,
24166	b: __m512i,
24167	) -> __m512i {
24168	unsafe {
24169	let permute: Simd = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
24170	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x8::ZERO))
24171	}
24172	}
24173
24174	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24175	///
24176	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi64&expand=4248)
24177	#[inline]
24178	#[target_feature(enable = "avx512f")]
24179	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24180	#[cfg_attr(test, assert_instr(vpermi2q))]
24181	pub fn _mm512_mask2_permutex2var_epi64(
24182	a: __m512i,
24183	idx: __m512i,
24184	k: __mmask8,
24185	b: __m512i,
24186	) -> __m512i {
24187	unsafe {
24188	let permute: Simd = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
24189	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i64x8()))
24190	}
24191	}
24192
24193	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24194	///
24195	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi64&expand=4246)
24196	#[inline]
24197	#[target_feature(enable = "avx512f,avx512vl")]
24198	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24199	#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24200	pub fn _mm256_permutex2var_epi64(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
24201	unsafe { transmute(src:vpermi2q256(a.as_i64x4(), idx.as_i64x4(), b.as_i64x4())) }
24202	}
24203
24204	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24205	///
24206	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi64&expand=4243)
24207	#[inline]
24208	#[target_feature(enable = "avx512f,avx512vl")]
24209	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24210	#[cfg_attr(test, assert_instr(vpermt2q))]
24211	pub fn _mm256_mask_permutex2var_epi64(
24212	a: __m256i,
24213	k: __mmask8,
24214	idx: __m256i,
24215	b: __m256i,
24216	) -> __m256i {
24217	unsafe {
24218	let permute: Simd = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
24219	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i64x4()))
24220	}
24221	}
24222
24223	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24224	///
24225	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi64&expand=4245)
24226	#[inline]
24227	#[target_feature(enable = "avx512f,avx512vl")]
24228	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24229	#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24230	pub fn _mm256_maskz_permutex2var_epi64(
24231	k: __mmask8,
24232	a: __m256i,
24233	idx: __m256i,
24234	b: __m256i,
24235	) -> __m256i {
24236	unsafe {
24237	let permute: Simd = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
24238	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x4::ZERO))
24239	}
24240	}
24241
24242	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24243	///
24244	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi64&expand=4244)
24245	#[inline]
24246	#[target_feature(enable = "avx512f,avx512vl")]
24247	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24248	#[cfg_attr(test, assert_instr(vpermi2q))]
24249	pub fn _mm256_mask2_permutex2var_epi64(
24250	a: __m256i,
24251	idx: __m256i,
24252	k: __mmask8,
24253	b: __m256i,
24254	) -> __m256i {
24255	unsafe {
24256	let permute: Simd = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
24257	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i64x4()))
24258	}
24259	}
24260
24261	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24262	///
24263	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi64&expand=4242)
24264	#[inline]
24265	#[target_feature(enable = "avx512f,avx512vl")]
24266	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24267	#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24268	pub fn _mm_permutex2var_epi64(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
24269	unsafe { transmute(src:vpermi2q128(a.as_i64x2(), idx.as_i64x2(), b.as_i64x2())) }
24270	}
24271
24272	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24273	///
24274	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi64&expand=4239)
24275	#[inline]
24276	#[target_feature(enable = "avx512f,avx512vl")]
24277	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24278	#[cfg_attr(test, assert_instr(vpermt2q))]
24279	pub fn _mm_mask_permutex2var_epi64(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
24280	unsafe {
24281	let permute: Simd = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
24282	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i64x2()))
24283	}
24284	}
24285
24286	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24287	///
24288	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi64&expand=4241)
24289	#[inline]
24290	#[target_feature(enable = "avx512f,avx512vl")]
24291	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24292	#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24293	pub fn _mm_maskz_permutex2var_epi64(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
24294	unsafe {
24295	let permute: Simd = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
24296	transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x2::ZERO))
24297	}
24298	}
24299
24300	/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24301	///
24302	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi64&expand=4240)
24303	#[inline]
24304	#[target_feature(enable = "avx512f,avx512vl")]
24305	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24306	#[cfg_attr(test, assert_instr(vpermi2q))]
24307	pub fn _mm_mask2_permutex2var_epi64(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
24308	unsafe {
24309	let permute: Simd = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
24310	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i64x2()))
24311	}
24312	}
24313
24314	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24315	///
24316	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_ps&expand=4286)
24317	#[inline]
24318	#[target_feature(enable = "avx512f")]
24319	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24320	#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24321	pub fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m512 {
24322	unsafe { transmute(src:vpermi2ps(a.as_f32x16(), idx.as_i32x16(), b.as_f32x16())) }
24323	}
24324
24325	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24326	///
24327	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_ps&expand=4283)
24328	#[inline]
24329	#[target_feature(enable = "avx512f")]
24330	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24331	#[cfg_attr(test, assert_instr(vpermt2ps))]
24332	pub fn _mm512_mask_permutex2var_ps(a: __m512, k: __mmask16, idx: __m512i, b: __m512) -> __m512 {
24333	unsafe {
24334	let permute: Simd = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
24335	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f32x16()))
24336	}
24337	}
24338
24339	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24340	///
24341	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_ps&expand=4285)
24342	#[inline]
24343	#[target_feature(enable = "avx512f")]
24344	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24345	#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24346	pub fn _mm512_maskz_permutex2var_ps(k: __mmask16, a: __m512, idx: __m512i, b: __m512) -> __m512 {
24347	unsafe {
24348	let permute: Simd = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
24349	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x16::ZERO))
24350	}
24351	}
24352
24353	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24354	///
24355	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_ps&expand=4284)
24356	#[inline]
24357	#[target_feature(enable = "avx512f")]
24358	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24359	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
24360	pub fn _mm512_mask2_permutex2var_ps(a: __m512, idx: __m512i, k: __mmask16, b: __m512) -> __m512 {
24361	unsafe {
24362	let permute: Simd = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
24363	let idx: Simd = _mm512_castsi512_ps(idx).as_f32x16();
24364	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
24365	}
24366	}
24367
24368	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24369	///
24370	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_ps&expand=4282)
24371	#[inline]
24372	#[target_feature(enable = "avx512f,avx512vl")]
24373	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24374	#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24375	pub fn _mm256_permutex2var_ps(a: __m256, idx: __m256i, b: __m256) -> __m256 {
24376	unsafe { transmute(src:vpermi2ps256(a.as_f32x8(), idx.as_i32x8(), b.as_f32x8())) }
24377	}
24378
24379	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24380	///
24381	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_ps&expand=4279)
24382	#[inline]
24383	#[target_feature(enable = "avx512f,avx512vl")]
24384	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24385	#[cfg_attr(test, assert_instr(vpermt2ps))]
24386	pub fn _mm256_mask_permutex2var_ps(a: __m256, k: __mmask8, idx: __m256i, b: __m256) -> __m256 {
24387	unsafe {
24388	let permute: Simd = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
24389	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f32x8()))
24390	}
24391	}
24392
24393	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24394	///
24395	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_ps&expand=4281)
24396	#[inline]
24397	#[target_feature(enable = "avx512f,avx512vl")]
24398	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24399	#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24400	pub fn _mm256_maskz_permutex2var_ps(k: __mmask8, a: __m256, idx: __m256i, b: __m256) -> __m256 {
24401	unsafe {
24402	let permute: Simd = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
24403	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x8::ZERO))
24404	}
24405	}
24406
24407	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24408	///
24409	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_ps&expand=4280)
24410	#[inline]
24411	#[target_feature(enable = "avx512f,avx512vl")]
24412	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24413	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
24414	pub fn _mm256_mask2_permutex2var_ps(a: __m256, idx: __m256i, k: __mmask8, b: __m256) -> __m256 {
24415	unsafe {
24416	let permute: Simd = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
24417	let idx: Simd = _mm256_castsi256_ps(idx).as_f32x8();
24418	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
24419	}
24420	}
24421
24422	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24423	///
24424	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_ps&expand=4278)
24425	#[inline]
24426	#[target_feature(enable = "avx512f,avx512vl")]
24427	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24428	#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24429	pub fn _mm_permutex2var_ps(a: __m128, idx: __m128i, b: __m128) -> __m128 {
24430	unsafe { transmute(src:vpermi2ps128(a.as_f32x4(), idx.as_i32x4(), b.as_f32x4())) }
24431	}
24432
24433	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24434	///
24435	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_ps&expand=4275)
24436	#[inline]
24437	#[target_feature(enable = "avx512f,avx512vl")]
24438	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24439	#[cfg_attr(test, assert_instr(vpermt2ps))]
24440	pub fn _mm_mask_permutex2var_ps(a: __m128, k: __mmask8, idx: __m128i, b: __m128) -> __m128 {
24441	unsafe {
24442	let permute: Simd = _mm_permutex2var_ps(a, idx, b).as_f32x4();
24443	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f32x4()))
24444	}
24445	}
24446
24447	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24448	///
24449	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_ps&expand=4277)
24450	#[inline]
24451	#[target_feature(enable = "avx512f,avx512vl")]
24452	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24453	#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24454	pub fn _mm_maskz_permutex2var_ps(k: __mmask8, a: __m128, idx: __m128i, b: __m128) -> __m128 {
24455	unsafe {
24456	let permute: Simd = _mm_permutex2var_ps(a, idx, b).as_f32x4();
24457	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x4::ZERO))
24458	}
24459	}
24460
24461	/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24462	///
24463	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_ps&expand=4276)
24464	#[inline]
24465	#[target_feature(enable = "avx512f,avx512vl")]
24466	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24467	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
24468	pub fn _mm_mask2_permutex2var_ps(a: __m128, idx: __m128i, k: __mmask8, b: __m128) -> __m128 {
24469	unsafe {
24470	let permute: Simd = _mm_permutex2var_ps(a, idx, b).as_f32x4();
24471	let idx: Simd = _mm_castsi128_ps(idx).as_f32x4();
24472	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
24473	}
24474	}
24475
24476	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24477	///
24478	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_pd&expand=4274)
24479	#[inline]
24480	#[target_feature(enable = "avx512f")]
24481	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24482	#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24483	pub fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
24484	unsafe { transmute(src:vpermi2pd(a.as_f64x8(), idx.as_i64x8(), b.as_f64x8())) }
24485	}
24486
24487	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24488	///
24489	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_pd&expand=4271)
24490	#[inline]
24491	#[target_feature(enable = "avx512f")]
24492	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24493	#[cfg_attr(test, assert_instr(vpermt2pd))]
24494	pub fn _mm512_mask_permutex2var_pd(a: __m512d, k: __mmask8, idx: __m512i, b: __m512d) -> __m512d {
24495	unsafe {
24496	let permute: Simd = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
24497	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f64x8()))
24498	}
24499	}
24500
24501	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24502	///
24503	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_pd&expand=4273)
24504	#[inline]
24505	#[target_feature(enable = "avx512f")]
24506	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24507	#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24508	pub fn _mm512_maskz_permutex2var_pd(k: __mmask8, a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
24509	unsafe {
24510	let permute: Simd = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
24511	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x8::ZERO))
24512	}
24513	}
24514
24515	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
24516	///
24517	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_pd&expand=4272)
24518	#[inline]
24519	#[target_feature(enable = "avx512f")]
24520	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24521	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
24522	pub fn _mm512_mask2_permutex2var_pd(a: __m512d, idx: __m512i, k: __mmask8, b: __m512d) -> __m512d {
24523	unsafe {
24524	let permute: Simd = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
24525	let idx: Simd = _mm512_castsi512_pd(idx).as_f64x8();
24526	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
24527	}
24528	}
24529
24530	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24531	///
24532	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_pd&expand=4270)
24533	#[inline]
24534	#[target_feature(enable = "avx512f,avx512vl")]
24535	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24536	#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24537	pub fn _mm256_permutex2var_pd(a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
24538	unsafe { transmute(src:vpermi2pd256(a.as_f64x4(), idx.as_i64x4(), b.as_f64x4())) }
24539	}
24540
24541	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24542	///
24543	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_pd&expand=4267)
24544	#[inline]
24545	#[target_feature(enable = "avx512f,avx512vl")]
24546	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24547	#[cfg_attr(test, assert_instr(vpermt2pd))]
24548	pub fn _mm256_mask_permutex2var_pd(a: __m256d, k: __mmask8, idx: __m256i, b: __m256d) -> __m256d {
24549	unsafe {
24550	let permute: Simd = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
24551	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f64x4()))
24552	}
24553	}
24554
24555	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24556	///
24557	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_pd&expand=4269)
24558	#[inline]
24559	#[target_feature(enable = "avx512f,avx512vl")]
24560	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24561	#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24562	pub fn _mm256_maskz_permutex2var_pd(k: __mmask8, a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
24563	unsafe {
24564	let permute: Simd = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
24565	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x4::ZERO))
24566	}
24567	}
24568
24569	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
24570	///
24571	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_pd&expand=4268)
24572	#[inline]
24573	#[target_feature(enable = "avx512f,avx512vl")]
24574	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24575	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
24576	pub fn _mm256_mask2_permutex2var_pd(a: __m256d, idx: __m256i, k: __mmask8, b: __m256d) -> __m256d {
24577	unsafe {
24578	let permute: Simd = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
24579	let idx: Simd = _mm256_castsi256_pd(idx).as_f64x4();
24580	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
24581	}
24582	}
24583
24584	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24585	///
24586	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_pd&expand=4266)
24587	#[inline]
24588	#[target_feature(enable = "avx512f,avx512vl")]
24589	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24590	#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24591	pub fn _mm_permutex2var_pd(a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
24592	unsafe { transmute(src:vpermi2pd128(a.as_f64x2(), idx.as_i64x2(), b.as_f64x2())) }
24593	}
24594
24595	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24596	///
24597	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_pd&expand=4263)
24598	#[inline]
24599	#[target_feature(enable = "avx512f,avx512vl")]
24600	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24601	#[cfg_attr(test, assert_instr(vpermt2pd))]
24602	pub fn _mm_mask_permutex2var_pd(a: __m128d, k: __mmask8, idx: __m128i, b: __m128d) -> __m128d {
24603	unsafe {
24604	let permute: Simd = _mm_permutex2var_pd(a, idx, b).as_f64x2();
24605	transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f64x2()))
24606	}
24607	}
24608
24609	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24610	///
24611	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_pd&expand=4265)
24612	#[inline]
24613	#[target_feature(enable = "avx512f,avx512vl")]
24614	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24615	#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24616	pub fn _mm_maskz_permutex2var_pd(k: __mmask8, a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
24617	unsafe {
24618	let permute: Simd = _mm_permutex2var_pd(a, idx, b).as_f64x2();
24619	transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x2::ZERO))
24620	}
24621	}
24622
24623	/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
24624	///
24625	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_pd&expand=4264)
24626	#[inline]
24627	#[target_feature(enable = "avx512f,avx512vl")]
24628	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24629	#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
24630	pub fn _mm_mask2_permutex2var_pd(a: __m128d, idx: __m128i, k: __mmask8, b: __m128d) -> __m128d {
24631	unsafe {
24632	let permute: Simd = _mm_permutex2var_pd(a, idx, b).as_f64x2();
24633	let idx: Simd = _mm_castsi128_pd(idx).as_f64x2();
24634	transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
24635	}
24636	}
24637
24638	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
24639	///
24640	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi32&expand=5150)
24641	#[inline]
24642	#[target_feature(enable = "avx512f")]
24643	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24644	#[cfg_attr(test, assert_instr(vshufps, MASK = `9`))] //should be vpshufd
24645	#[rustc_legacy_const_generics(`1`)]
24646	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24647	pub const fn _mm512_shuffle_epi32<const MASK: _MM_PERM_ENUM>(a: __m512i) -> __m512i {
24648	unsafe {
24649	static_assert_uimm_bits!(MASK, `8`);
24650	let r: i32x16 = simd_shuffle!(
24651	a.as_i32x16(),
24652	a.as_i32x16(),
24653	[
24654	MASK as u32 & `0b11`,
24655	(MASK as u32 >> `2`) & `0b11`,
24656	(MASK as u32 >> `4`) & `0b11`,
24657	(MASK as u32 >> `6`) & `0b11`,
24658	(MASK as u32 & `0b11`) + `4`,
24659	((MASK as u32 >> `2`) & `0b11`) + `4`,
24660	((MASK as u32 >> `4`) & `0b11`) + `4`,
24661	((MASK as u32 >> `6`) & `0b11`) + `4`,
24662	(MASK as u32 & `0b11`) + `8`,
24663	((MASK as u32 >> `2`) & `0b11`) + `8`,
24664	((MASK as u32 >> `4`) & `0b11`) + `8`,
24665	((MASK as u32 >> `6`) & `0b11`) + `8`,
24666	(MASK as u32 & `0b11`) + `12`,
24667	((MASK as u32 >> `2`) & `0b11`) + `12`,
24668	((MASK as u32 >> `4`) & `0b11`) + `12`,
24669	((MASK as u32 >> `6`) & `0b11`) + `12`,
24670	],
24671	);
24672	transmute(r)
24673	}
24674	}
24675
24676	/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24677	///
24678	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi32&expand=5148)
24679	#[inline]
24680	#[target_feature(enable = "avx512f")]
24681	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24682	#[cfg_attr(test, assert_instr(vpshufd, MASK = `9`))]
24683	#[rustc_legacy_const_generics(`3`)]
24684	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24685	pub const fn _mm512_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24686	src: __m512i,
24687	k: __mmask16,
24688	a: __m512i,
24689	) -> __m512i {
24690	unsafe {
24691	static_assert_uimm_bits!(MASK, `8`);
24692	let r: __m512i = _mm512_shuffle_epi32::<MASK>(a);
24693	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
24694	}
24695	}
24696
24697	/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24698	///
24699	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi32&expand=5149)
24700	#[inline]
24701	#[target_feature(enable = "avx512f")]
24702	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24703	#[cfg_attr(test, assert_instr(vpshufd, MASK = `9`))]
24704	#[rustc_legacy_const_generics(`2`)]
24705	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24706	pub const fn _mm512_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24707	k: __mmask16,
24708	a: __m512i,
24709	) -> __m512i {
24710	unsafe {
24711	static_assert_uimm_bits!(MASK, `8`);
24712	let r: __m512i = _mm512_shuffle_epi32::<MASK>(a);
24713	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:i32x16::ZERO))
24714	}
24715	}
24716
24717	/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24718	///
24719	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi32&expand=5145)
24720	#[inline]
24721	#[target_feature(enable = "avx512f,avx512vl")]
24722	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24723	#[cfg_attr(test, assert_instr(vpshufd, MASK = `9`))]
24724	#[rustc_legacy_const_generics(`3`)]
24725	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24726	pub const fn _mm256_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24727	src: __m256i,
24728	k: __mmask8,
24729	a: __m256i,
24730	) -> __m256i {
24731	unsafe {
24732	static_assert_uimm_bits!(MASK, `8`);
24733	let r: __m256i = _mm256_shuffle_epi32::<MASK>(a);
24734	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
24735	}
24736	}
24737
24738	/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24739	///
24740	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi32&expand=5146)
24741	#[inline]
24742	#[target_feature(enable = "avx512f,avx512vl")]
24743	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24744	#[cfg_attr(test, assert_instr(vpshufd, MASK = `9`))]
24745	#[rustc_legacy_const_generics(`2`)]
24746	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24747	pub const fn _mm256_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24748	k: __mmask8,
24749	a: __m256i,
24750	) -> __m256i {
24751	unsafe {
24752	static_assert_uimm_bits!(MASK, `8`);
24753	let r: __m256i = _mm256_shuffle_epi32::<MASK>(a);
24754	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:i32x8::ZERO))
24755	}
24756	}
24757
24758	/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24759	///
24760	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi32&expand=5142)
24761	#[inline]
24762	#[target_feature(enable = "avx512f,avx512vl")]
24763	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24764	#[cfg_attr(test, assert_instr(vpshufd, MASK = `9`))]
24765	#[rustc_legacy_const_generics(`3`)]
24766	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24767	pub const fn _mm_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24768	src: __m128i,
24769	k: __mmask8,
24770	a: __m128i,
24771	) -> __m128i {
24772	unsafe {
24773	static_assert_uimm_bits!(MASK, `8`);
24774	let r: __m128i = _mm_shuffle_epi32::<MASK>(a);
24775	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
24776	}
24777	}
24778
24779	/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24780	///
24781	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi32&expand=5143)
24782	#[inline]
24783	#[target_feature(enable = "avx512f,avx512vl")]
24784	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24785	#[cfg_attr(test, assert_instr(vpshufd, MASK = `9`))]
24786	#[rustc_legacy_const_generics(`2`)]
24787	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24788	pub const fn _mm_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24789	k: __mmask8,
24790	a: __m128i,
24791	) -> __m128i {
24792	unsafe {
24793	static_assert_uimm_bits!(MASK, `8`);
24794	let r: __m128i = _mm_shuffle_epi32::<MASK>(a);
24795	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:i32x4::ZERO))
24796	}
24797	}
24798
24799	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
24800	///
24801	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_ps&expand=5203)
24802	#[inline]
24803	#[target_feature(enable = "avx512f")]
24804	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24805	#[cfg_attr(test, assert_instr(vshufps, MASK = `3`))]
24806	#[rustc_legacy_const_generics(`2`)]
24807	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24808	pub const fn _mm512_shuffle_ps<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
24809	unsafe {
24810	static_assert_uimm_bits!(MASK, `8`);
24811	simd_shuffle!(
24812	a,
24813	b,
24814	[
24815	MASK as u32 & `0b11`,
24816	(MASK as u32 >> `2`) & `0b11`,
24817	((MASK as u32 >> `4`) & `0b11`) + `16`,
24818	((MASK as u32 >> `6`) & `0b11`) + `16`,
24819	(MASK as u32 & `0b11`) + `4`,
24820	((MASK as u32 >> `2`) & `0b11`) + `4`,
24821	((MASK as u32 >> `4`) & `0b11`) + `20`,
24822	((MASK as u32 >> `6`) & `0b11`) + `20`,
24823	(MASK as u32 & `0b11`) + `8`,
24824	((MASK as u32 >> `2`) & `0b11`) + `8`,
24825	((MASK as u32 >> `4`) & `0b11`) + `24`,
24826	((MASK as u32 >> `6`) & `0b11`) + `24`,
24827	(MASK as u32 & `0b11`) + `12`,
24828	((MASK as u32 >> `2`) & `0b11`) + `12`,
24829	((MASK as u32 >> `4`) & `0b11`) + `28`,
24830	((MASK as u32 >> `6`) & `0b11`) + `28`,
24831	],
24832	)
24833	}
24834	}
24835
24836	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24837	///
24838	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_ps&expand=5201)
24839	#[inline]
24840	#[target_feature(enable = "avx512f")]
24841	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24842	#[cfg_attr(test, assert_instr(vshufps, MASK = `3`))]
24843	#[rustc_legacy_const_generics(`4`)]
24844	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24845	pub const fn _mm512_mask_shuffle_ps<const MASK: i32>(
24846	src: __m512,
24847	k: __mmask16,
24848	a: __m512,
24849	b: __m512,
24850	) -> __m512 {
24851	unsafe {
24852	static_assert_uimm_bits!(MASK, `8`);
24853	let r: __m512 = _mm512_shuffle_ps::<MASK>(a, b);
24854	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
24855	}
24856	}
24857
24858	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24859	///
24860	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_ps&expand=5202)
24861	#[inline]
24862	#[target_feature(enable = "avx512f")]
24863	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24864	#[cfg_attr(test, assert_instr(vshufps, MASK = `3`))]
24865	#[rustc_legacy_const_generics(`3`)]
24866	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24867	pub const fn _mm512_maskz_shuffle_ps<const MASK: i32>(
24868	k: __mmask16,
24869	a: __m512,
24870	b: __m512,
24871	) -> __m512 {
24872	unsafe {
24873	static_assert_uimm_bits!(MASK, `8`);
24874	let r: __m512 = _mm512_shuffle_ps::<MASK>(a, b);
24875	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:f32x16::ZERO))
24876	}
24877	}
24878
24879	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24880	///
24881	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_ps&expand=5198)
24882	#[inline]
24883	#[target_feature(enable = "avx512f,avx512vl")]
24884	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24885	#[cfg_attr(test, assert_instr(vshufps, MASK = `3`))]
24886	#[rustc_legacy_const_generics(`4`)]
24887	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24888	pub const fn _mm256_mask_shuffle_ps<const MASK: i32>(
24889	src: __m256,
24890	k: __mmask8,
24891	a: __m256,
24892	b: __m256,
24893	) -> __m256 {
24894	unsafe {
24895	static_assert_uimm_bits!(MASK, `8`);
24896	let r: __m256 = _mm256_shuffle_ps::<MASK>(a, b);
24897	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
24898	}
24899	}
24900
24901	/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24902	///
24903	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_ps&expand=5199)
24904	#[inline]
24905	#[target_feature(enable = "avx512f,avx512vl")]
24906	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24907	#[cfg_attr(test, assert_instr(vshufps, MASK = `3`))]
24908	#[rustc_legacy_const_generics(`3`)]
24909	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24910	pub const fn _mm256_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24911	unsafe {
24912	static_assert_uimm_bits!(MASK, `8`);
24913	let r: __m256 = _mm256_shuffle_ps::<MASK>(a, b);
24914	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:f32x8::ZERO))
24915	}
24916	}
24917
24918	/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24919	///
24920	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_ps&expand=5195)
24921	#[inline]
24922	#[target_feature(enable = "avx512f,avx512vl")]
24923	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24924	#[cfg_attr(test, assert_instr(vshufps, MASK = `3`))]
24925	#[rustc_legacy_const_generics(`4`)]
24926	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24927	pub const fn _mm_mask_shuffle_ps<const MASK: i32>(
24928	src: __m128,
24929	k: __mmask8,
24930	a: __m128,
24931	b: __m128,
24932	) -> __m128 {
24933	unsafe {
24934	static_assert_uimm_bits!(MASK, `8`);
24935	let r: __m128 = _mm_shuffle_ps::<MASK>(a, b);
24936	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
24937	}
24938	}
24939
24940	/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24941	///
24942	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_ps&expand=5196)
24943	#[inline]
24944	#[target_feature(enable = "avx512f,avx512vl")]
24945	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24946	#[cfg_attr(test, assert_instr(vshufps, MASK = `3`))]
24947	#[rustc_legacy_const_generics(`3`)]
24948	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24949	pub const fn _mm_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
24950	unsafe {
24951	static_assert_uimm_bits!(MASK, `8`);
24952	let r: __m128 = _mm_shuffle_ps::<MASK>(a, b);
24953	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:f32x4::ZERO))
24954	}
24955	}
24956
24957	/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst.
24958	///
24959	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_pd&expand=5192)
24960	#[inline]
24961	#[target_feature(enable = "avx512f")]
24962	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24963	#[cfg_attr(test, assert_instr(vshufpd, MASK = `3`))]
24964	#[rustc_legacy_const_generics(`2`)]
24965	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24966	pub const fn _mm512_shuffle_pd<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
24967	unsafe {
24968	static_assert_uimm_bits!(MASK, `8`);
24969	simd_shuffle!(
24970	a,
24971	b,
24972	[
24973	MASK as u32 & `0b1`,
24974	((MASK as u32 >> `1`) & `0b1`) + `8`,
24975	((MASK as u32 >> `2`) & `0b1`) + `2`,
24976	((MASK as u32 >> `3`) & `0b1`) + `10`,
24977	((MASK as u32 >> `4`) & `0b1`) + `4`,
24978	((MASK as u32 >> `5`) & `0b1`) + `12`,
24979	((MASK as u32 >> `6`) & `0b1`) + `6`,
24980	((MASK as u32 >> `7`) & `0b1`) + `14`,
24981	],
24982	)
24983	}
24984	}
24985
24986	/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24987	///
24988	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_pd&expand=5190)
24989	#[inline]
24990	#[target_feature(enable = "avx512f")]
24991	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24992	#[cfg_attr(test, assert_instr(vshufpd, MASK = `3`))]
24993	#[rustc_legacy_const_generics(`4`)]
24994	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24995	pub const fn _mm512_mask_shuffle_pd<const MASK: i32>(
24996	src: __m512d,
24997	k: __mmask8,
24998	a: __m512d,
24999	b: __m512d,
25000	) -> __m512d {
25001	unsafe {
25002	static_assert_uimm_bits!(MASK, `8`);
25003	let r: __m512d = _mm512_shuffle_pd::<MASK>(a, b);
25004	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
25005	}
25006	}
25007
25008	/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25009	///
25010	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_pd&expand=5191)
25011	#[inline]
25012	#[target_feature(enable = "avx512f")]
25013	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25014	#[cfg_attr(test, assert_instr(vshufpd, MASK = `3`))]
25015	#[rustc_legacy_const_generics(`3`)]
25016	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25017	pub const fn _mm512_maskz_shuffle_pd<const MASK: i32>(
25018	k: __mmask8,
25019	a: __m512d,
25020	b: __m512d,
25021	) -> __m512d {
25022	unsafe {
25023	static_assert_uimm_bits!(MASK, `8`);
25024	let r: __m512d = _mm512_shuffle_pd::<MASK>(a, b);
25025	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
25026	}
25027	}
25028
25029	/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25030	///
25031	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_pd&expand=5187)
25032	#[inline]
25033	#[target_feature(enable = "avx512f,avx512vl")]
25034	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25035	#[cfg_attr(test, assert_instr(vshufpd, MASK = `3`))]
25036	#[rustc_legacy_const_generics(`4`)]
25037	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25038	pub const fn _mm256_mask_shuffle_pd<const MASK: i32>(
25039	src: __m256d,
25040	k: __mmask8,
25041	a: __m256d,
25042	b: __m256d,
25043	) -> __m256d {
25044	unsafe {
25045	static_assert_uimm_bits!(MASK, `8`);
25046	let r: __m256d = _mm256_shuffle_pd::<MASK>(a, b);
25047	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
25048	}
25049	}
25050
25051	/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25052	///
25053	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_pd&expand=5188)
25054	#[inline]
25055	#[target_feature(enable = "avx512f,avx512vl")]
25056	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25057	#[cfg_attr(test, assert_instr(vshufpd, MASK = `3`))]
25058	#[rustc_legacy_const_generics(`3`)]
25059	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25060	pub const fn _mm256_maskz_shuffle_pd<const MASK: i32>(
25061	k: __mmask8,
25062	a: __m256d,
25063	b: __m256d,
25064	) -> __m256d {
25065	unsafe {
25066	static_assert_uimm_bits!(MASK, `8`);
25067	let r: __m256d = _mm256_shuffle_pd::<MASK>(a, b);
25068	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
25069	}
25070	}
25071
25072	/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25073	///
25074	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_pd&expand=5184)
25075	#[inline]
25076	#[target_feature(enable = "avx512f,avx512vl")]
25077	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25078	#[cfg_attr(test, assert_instr(vshufpd, MASK = `1`))]
25079	#[rustc_legacy_const_generics(`4`)]
25080	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25081	pub const fn _mm_mask_shuffle_pd<const MASK: i32>(
25082	src: __m128d,
25083	k: __mmask8,
25084	a: __m128d,
25085	b: __m128d,
25086	) -> __m128d {
25087	unsafe {
25088	static_assert_uimm_bits!(MASK, `8`);
25089	let r: __m128d = _mm_shuffle_pd::<MASK>(a, b);
25090	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:src.as_f64x2()))
25091	}
25092	}
25093
25094	/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25095	///
25096	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_pd&expand=5185)
25097	#[inline]
25098	#[target_feature(enable = "avx512f,avx512vl")]
25099	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25100	#[cfg_attr(test, assert_instr(vshufpd, MASK = `1`))]
25101	#[rustc_legacy_const_generics(`3`)]
25102	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25103	pub const fn _mm_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
25104	unsafe {
25105	static_assert_uimm_bits!(MASK, `8`);
25106	let r: __m128d = _mm_shuffle_pd::<MASK>(a, b);
25107	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:f64x2::ZERO))
25108	}
25109	}
25110
25111	/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
25112	///
25113	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i32x4&expand=5177)
25114	#[inline]
25115	#[target_feature(enable = "avx512f")]
25116	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25117	#[cfg_attr(test, assert_instr(vshufi64x2, MASK = `0b10_01_01_01`))] //should be vshufi32x4
25118	#[rustc_legacy_const_generics(`2`)]
25119	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25120	pub const fn _mm512_shuffle_i32x4<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
25121	unsafe {
25122	static_assert_uimm_bits!(MASK, `8`);
25123	let a = a.as_i32x16();
25124	let b = b.as_i32x16();
25125	let r: i32x16 = simd_shuffle!(
25126	a,
25127	b,
25128	[
25129	(MASK as u32 & `0b11`) * `4` + `0`,
25130	(MASK as u32 & `0b11`) * `4` + `1`,
25131	(MASK as u32 & `0b11`) * `4` + `2`,
25132	(MASK as u32 & `0b11`) * `4` + `3`,
25133	((MASK as u32 >> `2`) & `0b11`) * `4` + `0`,
25134	((MASK as u32 >> `2`) & `0b11`) * `4` + `1`,
25135	((MASK as u32 >> `2`) & `0b11`) * `4` + `2`,
25136	((MASK as u32 >> `2`) & `0b11`) * `4` + `3`,
25137	((MASK as u32 >> `4`) & `0b11`) * `4` + `0` + `16`,
25138	((MASK as u32 >> `4`) & `0b11`) * `4` + `1` + `16`,
25139	((MASK as u32 >> `4`) & `0b11`) * `4` + `2` + `16`,
25140	((MASK as u32 >> `4`) & `0b11`) * `4` + `3` + `16`,
25141	((MASK as u32 >> `6`) & `0b11`) * `4` + `0` + `16`,
25142	((MASK as u32 >> `6`) & `0b11`) * `4` + `1` + `16`,
25143	((MASK as u32 >> `6`) & `0b11`) * `4` + `2` + `16`,
25144	((MASK as u32 >> `6`) & `0b11`) * `4` + `3` + `16`,
25145	],
25146	);
25147	transmute(r)
25148	}
25149	}
25150
25151	/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25152	///
25153	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i32x4&expand=5175)
25154	#[inline]
25155	#[target_feature(enable = "avx512f")]
25156	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25157	#[cfg_attr(test, assert_instr(vshufi32x4, MASK = `0b10_11_01_01`))]
25158	#[rustc_legacy_const_generics(`4`)]
25159	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25160	pub const fn _mm512_mask_shuffle_i32x4<const MASK: i32>(
25161	src: __m512i,
25162	k: __mmask16,
25163	a: __m512i,
25164	b: __m512i,
25165	) -> __m512i {
25166	unsafe {
25167	static_assert_uimm_bits!(MASK, `8`);
25168	let r: __m512i = _mm512_shuffle_i32x4::<MASK>(a, b);
25169	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
25170	}
25171	}
25172
25173	/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25174	///
25175	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i32x4&expand=5176)
25176	#[inline]
25177	#[target_feature(enable = "avx512f")]
25178	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25179	#[cfg_attr(test, assert_instr(vshufi32x4, MASK = `0b10_11_01_01`))]
25180	#[rustc_legacy_const_generics(`3`)]
25181	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25182	pub const fn _mm512_maskz_shuffle_i32x4<const MASK: i32>(
25183	k: __mmask16,
25184	a: __m512i,
25185	b: __m512i,
25186	) -> __m512i {
25187	unsafe {
25188	static_assert_uimm_bits!(MASK, `8`);
25189	let r: __m512i = _mm512_shuffle_i32x4::<MASK>(a, b);
25190	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:i32x16::ZERO))
25191	}
25192	}
25193
25194	/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
25195	///
25196	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i32x4&expand=5174)
25197	#[inline]
25198	#[target_feature(enable = "avx512f,avx512vl")]
25199	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25200	#[cfg_attr(test, assert_instr(vperm, MASK = `0b11`))] //should be vshufi32x4
25201	#[rustc_legacy_const_generics(`2`)]
25202	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25203	pub const fn _mm256_shuffle_i32x4<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
25204	unsafe {
25205	static_assert_uimm_bits!(MASK, `8`);
25206	let a: Simd = a.as_i32x8();
25207	let b: Simd = b.as_i32x8();
25208	let r: i32x8 = simd_shuffle!(
25209	a,
25210	b,
25211	[
25212	(MASK as u32 & `0b1`) * `4` + `0`,
25213	(MASK as u32 & `0b1`) * `4` + `1`,
25214	(MASK as u32 & `0b1`) * `4` + `2`,
25215	(MASK as u32 & `0b1`) * `4` + `3`,
25216	((MASK as u32 >> `1`) & `0b1`) * `4` + `0` + `8`,
25217	((MASK as u32 >> `1`) & `0b1`) * `4` + `1` + `8`,
25218	((MASK as u32 >> `1`) & `0b1`) * `4` + `2` + `8`,
25219	((MASK as u32 >> `1`) & `0b1`) * `4` + `3` + `8`,
25220	],
25221	);
25222	transmute(src:r)
25223	}
25224	}
25225
25226	/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25227	///
25228	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i32x4&expand=5172)
25229	#[inline]
25230	#[target_feature(enable = "avx512f,avx512vl")]
25231	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25232	#[cfg_attr(test, assert_instr(vshufi32x4, MASK = `0b11`))]
25233	#[rustc_legacy_const_generics(`4`)]
25234	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25235	pub const fn _mm256_mask_shuffle_i32x4<const MASK: i32>(
25236	src: __m256i,
25237	k: __mmask8,
25238	a: __m256i,
25239	b: __m256i,
25240	) -> __m256i {
25241	unsafe {
25242	static_assert_uimm_bits!(MASK, `8`);
25243	let r: __m256i = _mm256_shuffle_i32x4::<MASK>(a, b);
25244	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
25245	}
25246	}
25247
25248	/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25249	///
25250	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i32x4&expand=5173)
25251	#[inline]
25252	#[target_feature(enable = "avx512f,avx512vl")]
25253	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25254	#[cfg_attr(test, assert_instr(vshufi32x4, MASK = `0b11`))]
25255	#[rustc_legacy_const_generics(`3`)]
25256	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25257	pub const fn _mm256_maskz_shuffle_i32x4<const MASK: i32>(
25258	k: __mmask8,
25259	a: __m256i,
25260	b: __m256i,
25261	) -> __m256i {
25262	unsafe {
25263	static_assert_uimm_bits!(MASK, `8`);
25264	let r: __m256i = _mm256_shuffle_i32x4::<MASK>(a, b);
25265	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:i32x8::ZERO))
25266	}
25267	}
25268
25269	/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
25270	///
25271	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i64x2&expand=5183)
25272	#[inline]
25273	#[target_feature(enable = "avx512f")]
25274	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25275	#[cfg_attr(test, assert_instr(vshufi64x2, MASK = `0b10_11_11_11`))]
25276	#[rustc_legacy_const_generics(`2`)]
25277	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25278	pub const fn _mm512_shuffle_i64x2<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
25279	unsafe {
25280	static_assert_uimm_bits!(MASK, `8`);
25281	let a: Simd = a.as_i64x8();
25282	let b: Simd = b.as_i64x8();
25283	let r: i64x8 = simd_shuffle!(
25284	a,
25285	b,
25286	[
25287	(MASK as u32 & `0b11`) * `2` + `0`,
25288	(MASK as u32 & `0b11`) * `2` + `1`,
25289	((MASK as u32 >> `2`) & `0b11`) * `2` + `0`,
25290	((MASK as u32 >> `2`) & `0b11`) * `2` + `1`,
25291	((MASK as u32 >> `4`) & `0b11`) * `2` + `0` + `8`,
25292	((MASK as u32 >> `4`) & `0b11`) * `2` + `1` + `8`,
25293	((MASK as u32 >> `6`) & `0b11`) * `2` + `0` + `8`,
25294	((MASK as u32 >> `6`) & `0b11`) * `2` + `1` + `8`,
25295	],
25296	);
25297	transmute(src:r)
25298	}
25299	}
25300
25301	/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25302	///
25303	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i64x2&expand=5181)
25304	#[inline]
25305	#[target_feature(enable = "avx512f")]
25306	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25307	#[cfg_attr(test, assert_instr(vshufi64x2, MASK = `0b10_11_11_11`))]
25308	#[rustc_legacy_const_generics(`4`)]
25309	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25310	pub const fn _mm512_mask_shuffle_i64x2<const MASK: i32>(
25311	src: __m512i,
25312	k: __mmask8,
25313	a: __m512i,
25314	b: __m512i,
25315	) -> __m512i {
25316	unsafe {
25317	static_assert_uimm_bits!(MASK, `8`);
25318	let r: __m512i = _mm512_shuffle_i64x2::<MASK>(a, b);
25319	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
25320	}
25321	}
25322
25323	/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25324	///
25325	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i64x2&expand=5182)
25326	#[inline]
25327	#[target_feature(enable = "avx512f")]
25328	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25329	#[cfg_attr(test, assert_instr(vshufi64x2, MASK = `0b10_11_11_11`))]
25330	#[rustc_legacy_const_generics(`3`)]
25331	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25332	pub const fn _mm512_maskz_shuffle_i64x2<const MASK: i32>(
25333	k: __mmask8,
25334	a: __m512i,
25335	b: __m512i,
25336	) -> __m512i {
25337	unsafe {
25338	static_assert_uimm_bits!(MASK, `8`);
25339	let r: __m512i = _mm512_shuffle_i64x2::<MASK>(a, b);
25340	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:i64x8::ZERO))
25341	}
25342	}
25343
25344	/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
25345	///
25346	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i64x2&expand=5180)
25347	#[inline]
25348	#[target_feature(enable = "avx512f,avx512vl")]
25349	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25350	#[cfg_attr(test, assert_instr(vperm, MASK = `0b01`))] //should be vshufi64x2
25351	#[rustc_legacy_const_generics(`2`)]
25352	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25353	pub const fn _mm256_shuffle_i64x2<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
25354	unsafe {
25355	static_assert_uimm_bits!(MASK, `8`);
25356	let a: Simd = a.as_i64x4();
25357	let b: Simd = b.as_i64x4();
25358	let r: i64x4 = simd_shuffle!(
25359	a,
25360	b,
25361	[
25362	(MASK as u32 & `0b1`) * `2` + `0`,
25363	(MASK as u32 & `0b1`) * `2` + `1`,
25364	((MASK as u32 >> `1`) & `0b1`) * `2` + `0` + `4`,
25365	((MASK as u32 >> `1`) & `0b1`) * `2` + `1` + `4`,
25366	],
25367	);
25368	transmute(src:r)
25369	}
25370	}
25371
25372	/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25373	///
25374	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i64x2&expand=5178)
25375	#[inline]
25376	#[target_feature(enable = "avx512f,avx512vl")]
25377	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25378	#[cfg_attr(test, assert_instr(vshufi64x2, MASK = `0b11`))]
25379	#[rustc_legacy_const_generics(`4`)]
25380	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25381	pub const fn _mm256_mask_shuffle_i64x2<const MASK: i32>(
25382	src: __m256i,
25383	k: __mmask8,
25384	a: __m256i,
25385	b: __m256i,
25386	) -> __m256i {
25387	unsafe {
25388	static_assert_uimm_bits!(MASK, `8`);
25389	let r: __m256i = _mm256_shuffle_i64x2::<MASK>(a, b);
25390	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
25391	}
25392	}
25393
25394	/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25395	///
25396	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i64x2&expand=5179)
25397	#[inline]
25398	#[target_feature(enable = "avx512f,avx512vl")]
25399	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25400	#[cfg_attr(test, assert_instr(vshufi64x2, MASK = `0b11`))]
25401	#[rustc_legacy_const_generics(`3`)]
25402	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25403	pub const fn _mm256_maskz_shuffle_i64x2<const MASK: i32>(
25404	k: __mmask8,
25405	a: __m256i,
25406	b: __m256i,
25407	) -> __m256i {
25408	unsafe {
25409	static_assert_uimm_bits!(MASK, `8`);
25410	let r: __m256i = _mm256_shuffle_i64x2::<MASK>(a, b);
25411	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:i64x4::ZERO))
25412	}
25413	}
25414
25415	/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
25416	///
25417	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f32x4&expand=5165)
25418	#[inline]
25419	#[target_feature(enable = "avx512f")]
25420	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25421	#[cfg_attr(test, assert_instr(vshuff64x2, MASK = `0b1011`))] //should be vshuff32x4, but generate vshuff64x2
25422	#[rustc_legacy_const_generics(`2`)]
25423	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25424	pub const fn _mm512_shuffle_f32x4<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
25425	unsafe {
25426	static_assert_uimm_bits!(MASK, `8`);
25427	let a = a.as_f32x16();
25428	let b = b.as_f32x16();
25429	let r: f32x16 = simd_shuffle!(
25430	a,
25431	b,
25432	[
25433	(MASK as u32 & `0b11`) * `4` + `0`,
25434	(MASK as u32 & `0b11`) * `4` + `1`,
25435	(MASK as u32 & `0b11`) * `4` + `2`,
25436	(MASK as u32 & `0b11`) * `4` + `3`,
25437	((MASK as u32 >> `2`) & `0b11`) * `4` + `0`,
25438	((MASK as u32 >> `2`) & `0b11`) * `4` + `1`,
25439	((MASK as u32 >> `2`) & `0b11`) * `4` + `2`,
25440	((MASK as u32 >> `2`) & `0b11`) * `4` + `3`,
25441	((MASK as u32 >> `4`) & `0b11`) * `4` + `0` + `16`,
25442	((MASK as u32 >> `4`) & `0b11`) * `4` + `1` + `16`,
25443	((MASK as u32 >> `4`) & `0b11`) * `4` + `2` + `16`,
25444	((MASK as u32 >> `4`) & `0b11`) * `4` + `3` + `16`,
25445	((MASK as u32 >> `6`) & `0b11`) * `4` + `0` + `16`,
25446	((MASK as u32 >> `6`) & `0b11`) * `4` + `1` + `16`,
25447	((MASK as u32 >> `6`) & `0b11`) * `4` + `2` + `16`,
25448	((MASK as u32 >> `6`) & `0b11`) * `4` + `3` + `16`,
25449	],
25450	);
25451	transmute(r)
25452	}
25453	}
25454
25455	/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25456	///
25457	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f32x4&expand=5163)
25458	#[inline]
25459	#[target_feature(enable = "avx512f")]
25460	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25461	#[cfg_attr(test, assert_instr(vshuff32x4, MASK = `0b1011`))]
25462	#[rustc_legacy_const_generics(`4`)]
25463	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25464	pub const fn _mm512_mask_shuffle_f32x4<const MASK: i32>(
25465	src: __m512,
25466	k: __mmask16,
25467	a: __m512,
25468	b: __m512,
25469	) -> __m512 {
25470	unsafe {
25471	static_assert_uimm_bits!(MASK, `8`);
25472	let r: __m512 = _mm512_shuffle_f32x4::<MASK>(a, b);
25473	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
25474	}
25475	}
25476
25477	/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25478	///
25479	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f32x4&expand=5164)
25480	#[inline]
25481	#[target_feature(enable = "avx512f")]
25482	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25483	#[cfg_attr(test, assert_instr(vshuff32x4, MASK = `0b1011`))]
25484	#[rustc_legacy_const_generics(`3`)]
25485	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25486	pub const fn _mm512_maskz_shuffle_f32x4<const MASK: i32>(
25487	k: __mmask16,
25488	a: __m512,
25489	b: __m512,
25490	) -> __m512 {
25491	unsafe {
25492	static_assert_uimm_bits!(MASK, `8`);
25493	let r: __m512 = _mm512_shuffle_f32x4::<MASK>(a, b);
25494	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:f32x16::ZERO))
25495	}
25496	}
25497
25498	/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
25499	///
25500	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f32x4&expand=5162)
25501	#[inline]
25502	#[target_feature(enable = "avx512f,avx512vl")]
25503	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25504	#[cfg_attr(test, assert_instr(vperm, MASK = `0b01`))] //should be vshuff32x4
25505	#[rustc_legacy_const_generics(`2`)]
25506	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25507	pub const fn _mm256_shuffle_f32x4<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
25508	unsafe {
25509	static_assert_uimm_bits!(MASK, `8`);
25510	let a: Simd = a.as_f32x8();
25511	let b: Simd = b.as_f32x8();
25512	let r: f32x8 = simd_shuffle!(
25513	a,
25514	b,
25515	[
25516	(MASK as u32 & `0b1`) * `4` + `0`,
25517	(MASK as u32 & `0b1`) * `4` + `1`,
25518	(MASK as u32 & `0b1`) * `4` + `2`,
25519	(MASK as u32 & `0b1`) * `4` + `3`,
25520	((MASK as u32 >> `1`) & `0b1`) * `4` + `0` + `8`,
25521	((MASK as u32 >> `1`) & `0b1`) * `4` + `1` + `8`,
25522	((MASK as u32 >> `1`) & `0b1`) * `4` + `2` + `8`,
25523	((MASK as u32 >> `1`) & `0b1`) * `4` + `3` + `8`,
25524	],
25525	);
25526	transmute(src:r)
25527	}
25528	}
25529
25530	/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25531	///
25532	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f32x4&expand=5160)
25533	#[inline]
25534	#[target_feature(enable = "avx512f,avx512vl")]
25535	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25536	#[cfg_attr(test, assert_instr(vshuff32x4, MASK = `0b11`))]
25537	#[rustc_legacy_const_generics(`4`)]
25538	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25539	pub const fn _mm256_mask_shuffle_f32x4<const MASK: i32>(
25540	src: __m256,
25541	k: __mmask8,
25542	a: __m256,
25543	b: __m256,
25544	) -> __m256 {
25545	unsafe {
25546	static_assert_uimm_bits!(MASK, `8`);
25547	let r: __m256 = _mm256_shuffle_f32x4::<MASK>(a, b);
25548	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
25549	}
25550	}
25551
25552	/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25553	///
25554	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f32x4&expand=5161)
25555	#[inline]
25556	#[target_feature(enable = "avx512f,avx512vl")]
25557	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25558	#[cfg_attr(test, assert_instr(vshuff32x4, MASK = `0b11`))]
25559	#[rustc_legacy_const_generics(`3`)]
25560	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25561	pub const fn _mm256_maskz_shuffle_f32x4<const MASK: i32>(
25562	k: __mmask8,
25563	a: __m256,
25564	b: __m256,
25565	) -> __m256 {
25566	unsafe {
25567	static_assert_uimm_bits!(MASK, `8`);
25568	let r: __m256 = _mm256_shuffle_f32x4::<MASK>(a, b);
25569	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:f32x8::ZERO))
25570	}
25571	}
25572
25573	/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
25574	///
25575	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f64x2&expand=5171)
25576	#[inline]
25577	#[target_feature(enable = "avx512f")]
25578	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25579	#[cfg_attr(test, assert_instr(vshuff64x2, MASK = `0b10_11_11_11`))]
25580	#[rustc_legacy_const_generics(`2`)]
25581	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25582	pub const fn _mm512_shuffle_f64x2<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
25583	unsafe {
25584	static_assert_uimm_bits!(MASK, `8`);
25585	let a: Simd = a.as_f64x8();
25586	let b: Simd = b.as_f64x8();
25587	let r: f64x8 = simd_shuffle!(
25588	a,
25589	b,
25590	[
25591	(MASK as u32 & `0b11`) * `2` + `0`,
25592	(MASK as u32 & `0b11`) * `2` + `1`,
25593	((MASK as u32 >> `2`) & `0b11`) * `2` + `0`,
25594	((MASK as u32 >> `2`) & `0b11`) * `2` + `1`,
25595	((MASK as u32 >> `4`) & `0b11`) * `2` + `0` + `8`,
25596	((MASK as u32 >> `4`) & `0b11`) * `2` + `1` + `8`,
25597	((MASK as u32 >> `6`) & `0b11`) * `2` + `0` + `8`,
25598	((MASK as u32 >> `6`) & `0b11`) * `2` + `1` + `8`,
25599	],
25600	);
25601	transmute(src:r)
25602	}
25603	}
25604
25605	/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25606	///
25607	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f64x2&expand=5169)
25608	#[inline]
25609	#[target_feature(enable = "avx512f")]
25610	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25611	#[cfg_attr(test, assert_instr(vshuff64x2, MASK = `0b10_11_11_11`))]
25612	#[rustc_legacy_const_generics(`4`)]
25613	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25614	pub const fn _mm512_mask_shuffle_f64x2<const MASK: i32>(
25615	src: __m512d,
25616	k: __mmask8,
25617	a: __m512d,
25618	b: __m512d,
25619	) -> __m512d {
25620	unsafe {
25621	static_assert_uimm_bits!(MASK, `8`);
25622	let r: __m512d = _mm512_shuffle_f64x2::<MASK>(a, b);
25623	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
25624	}
25625	}
25626
25627	/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25628	///
25629	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f64x2&expand=5170)
25630	#[inline]
25631	#[target_feature(enable = "avx512f")]
25632	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25633	#[cfg_attr(test, assert_instr(vshuff64x2, MASK = `0b10_11_11_11`))]
25634	#[rustc_legacy_const_generics(`3`)]
25635	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25636	pub const fn _mm512_maskz_shuffle_f64x2<const MASK: i32>(
25637	k: __mmask8,
25638	a: __m512d,
25639	b: __m512d,
25640	) -> __m512d {
25641	unsafe {
25642	static_assert_uimm_bits!(MASK, `8`);
25643	let r: __m512d = _mm512_shuffle_f64x2::<MASK>(a, b);
25644	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
25645	}
25646	}
25647
25648	/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
25649	///
25650	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f64x2&expand=5168)
25651	#[inline]
25652	#[target_feature(enable = "avx512f,avx512vl")]
25653	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25654	#[cfg_attr(test, assert_instr(vperm, MASK = `0b01`))] //should be vshuff64x2
25655	#[rustc_legacy_const_generics(`2`)]
25656	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25657	pub const fn _mm256_shuffle_f64x2<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
25658	unsafe {
25659	static_assert_uimm_bits!(MASK, `8`);
25660	let a: Simd = a.as_f64x4();
25661	let b: Simd = b.as_f64x4();
25662	let r: f64x4 = simd_shuffle!(
25663	a,
25664	b,
25665	[
25666	(MASK as u32 & `0b1`) * `2` + `0`,
25667	(MASK as u32 & `0b1`) * `2` + `1`,
25668	((MASK as u32 >> `1`) & `0b1`) * `2` + `0` + `4`,
25669	((MASK as u32 >> `1`) & `0b1`) * `2` + `1` + `4`,
25670	],
25671	);
25672	transmute(src:r)
25673	}
25674	}
25675
25676	/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25677	///
25678	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f64x2&expand=5166)
25679	#[inline]
25680	#[target_feature(enable = "avx512f,avx512vl")]
25681	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25682	#[cfg_attr(test, assert_instr(vshuff64x2, MASK = `0b11`))]
25683	#[rustc_legacy_const_generics(`4`)]
25684	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25685	pub const fn _mm256_mask_shuffle_f64x2<const MASK: i32>(
25686	src: __m256d,
25687	k: __mmask8,
25688	a: __m256d,
25689	b: __m256d,
25690	) -> __m256d {
25691	unsafe {
25692	static_assert_uimm_bits!(MASK, `8`);
25693	let r: __m256d = _mm256_shuffle_f64x2::<MASK>(a, b);
25694	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
25695	}
25696	}
25697
25698	/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25699	///
25700	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f64x2&expand=5167)
25701	#[inline]
25702	#[target_feature(enable = "avx512f,avx512vl")]
25703	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25704	#[cfg_attr(test, assert_instr(vshuff64x2, MASK = `0b11`))]
25705	#[rustc_legacy_const_generics(`3`)]
25706	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25707	pub const fn _mm256_maskz_shuffle_f64x2<const MASK: i32>(
25708	k: __mmask8,
25709	a: __m256d,
25710	b: __m256d,
25711	) -> __m256d {
25712	unsafe {
25713	static_assert_uimm_bits!(MASK, `8`);
25714	let r: __m256d = _mm256_shuffle_f64x2::<MASK>(a, b);
25715	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
25716	}
25717	}
25718
25719	/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
25720	///
25721	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x4_ps&expand=2442)
25722	#[inline]
25723	#[target_feature(enable = "avx512f")]
25724	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25725	#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = `3`))]
25726	#[rustc_legacy_const_generics(`1`)]
25727	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25728	pub const fn _mm512_extractf32x4_ps<const IMM8: i32>(a: __m512) -> __m128 {
25729	unsafe {
25730	static_assert_uimm_bits!(IMM8, `2`);
25731	match IMM8 & `0x3` {
25732	`0` => simd_shuffle!(a, _mm512_undefined_ps(), [`0`, `1`, `2`, `3`]),
25733	`1` => simd_shuffle!(a, _mm512_undefined_ps(), [`4`, `5`, `6`, `7`]),
25734	`2` => simd_shuffle!(a, _mm512_undefined_ps(), [`8`, `9`, `10`, `11`]),
25735	_ => simd_shuffle!(a, _mm512_undefined_ps(), [`12`, `13`, `14`, `15`]),
25736	}
25737	}
25738	}
25739
25740	/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25741	///
25742	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x4_ps&expand=2443)
25743	#[inline]
25744	#[target_feature(enable = "avx512f")]
25745	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25746	#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = `3`))]
25747	#[rustc_legacy_const_generics(`3`)]
25748	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25749	pub const fn _mm512_mask_extractf32x4_ps<const IMM8: i32>(
25750	src: __m128,
25751	k: __mmask8,
25752	a: __m512,
25753	) -> __m128 {
25754	unsafe {
25755	static_assert_uimm_bits!(IMM8, `2`);
25756	let r: __m128 = _mm512_extractf32x4_ps::<IMM8>(a);
25757	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
25758	}
25759	}
25760
25761	/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25762	///
25763	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x4_ps&expand=2444)
25764	#[inline]
25765	#[target_feature(enable = "avx512f")]
25766	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25767	#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = `3`))]
25768	#[rustc_legacy_const_generics(`2`)]
25769	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25770	pub const fn _mm512_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m128 {
25771	unsafe {
25772	static_assert_uimm_bits!(IMM8, `2`);
25773	let r: __m128 = _mm512_extractf32x4_ps::<IMM8>(a);
25774	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:f32x4::ZERO))
25775	}
25776	}
25777
25778	/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
25779	///
25780	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf32x4_ps&expand=2439)
25781	#[inline]
25782	#[target_feature(enable = "avx512f,avx512vl")]
25783	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25784	#[cfg_attr(
25785	test,
25786	assert_instr(vextract, IMM8 = `1`) //should be vextractf32x4
25787	)]
25788	#[rustc_legacy_const_generics(`1`)]
25789	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25790	pub const fn _mm256_extractf32x4_ps<const IMM8: i32>(a: __m256) -> __m128 {
25791	unsafe {
25792	static_assert_uimm_bits!(IMM8, `1`);
25793	match IMM8 & `0x1` {
25794	`0` => simd_shuffle!(a, _mm256_undefined_ps(), [`0`, `1`, `2`, `3`]),
25795	_ => simd_shuffle!(a, _mm256_undefined_ps(), [`4`, `5`, `6`, `7`]),
25796	}
25797	}
25798	}
25799
25800	/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25801	///
25802	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf32x4_ps&expand=2440)
25803	#[inline]
25804	#[target_feature(enable = "avx512f,avx512vl")]
25805	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25806	#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = `1`))]
25807	#[rustc_legacy_const_generics(`3`)]
25808	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25809	pub const fn _mm256_mask_extractf32x4_ps<const IMM8: i32>(
25810	src: __m128,
25811	k: __mmask8,
25812	a: __m256,
25813	) -> __m128 {
25814	unsafe {
25815	static_assert_uimm_bits!(IMM8, `1`);
25816	let r: __m128 = _mm256_extractf32x4_ps::<IMM8>(a);
25817	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
25818	}
25819	}
25820
25821	/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25822	///
25823	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf32x4_ps&expand=2441)
25824	#[inline]
25825	#[target_feature(enable = "avx512f,avx512vl")]
25826	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25827	#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = `1`))]
25828	#[rustc_legacy_const_generics(`2`)]
25829	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25830	pub const fn _mm256_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128 {
25831	unsafe {
25832	static_assert_uimm_bits!(IMM8, `1`);
25833	let r: __m128 = _mm256_extractf32x4_ps::<IMM8>(a);
25834	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:f32x4::ZERO))
25835	}
25836	}
25837
25838	/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the result in dst.
25839	///
25840	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x4_epi64&expand=2473)
25841	#[inline]
25842	#[target_feature(enable = "avx512f")]
25843	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25844	#[cfg_attr(
25845	test,
25846	assert_instr(vextractf64x4, IMM1 = `1`) //should be vextracti64x4
25847	)]
25848	#[rustc_legacy_const_generics(`1`)]
25849	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25850	pub const fn _mm512_extracti64x4_epi64<const IMM1: i32>(a: __m512i) -> __m256i {
25851	unsafe {
25852	static_assert_uimm_bits!(IMM1, `1`);
25853	match IMM1 {
25854	`0` => simd_shuffle!(a, _mm512_setzero_si512(), [`0`, `1`, `2`, `3`]),
25855	_ => simd_shuffle!(a, _mm512_setzero_si512(), [`4`, `5`, `6`, `7`]),
25856	}
25857	}
25858	}
25859
25860	/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25861	///
25862	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x4_epi64&expand=2474)
25863	#[inline]
25864	#[target_feature(enable = "avx512f")]
25865	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25866	#[cfg_attr(test, assert_instr(vextracti64x4, IMM1 = `1`))]
25867	#[rustc_legacy_const_generics(`3`)]
25868	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25869	pub const fn _mm512_mask_extracti64x4_epi64<const IMM1: i32>(
25870	src: __m256i,
25871	k: __mmask8,
25872	a: __m512i,
25873	) -> __m256i {
25874	unsafe {
25875	static_assert_uimm_bits!(IMM1, `1`);
25876	let r: __m256i = _mm512_extracti64x4_epi64::<IMM1>(a);
25877	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
25878	}
25879	}
25880
25881	/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25882	///
25883	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x4_epi64&expand=2475)
25884	#[inline]
25885	#[target_feature(enable = "avx512f")]
25886	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25887	#[cfg_attr(test, assert_instr(vextracti64x4, IMM1 = `1`))]
25888	#[rustc_legacy_const_generics(`2`)]
25889	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25890	pub const fn _mm512_maskz_extracti64x4_epi64<const IMM1: i32>(k: __mmask8, a: __m512i) -> __m256i {
25891	unsafe {
25892	static_assert_uimm_bits!(IMM1, `1`);
25893	let r: __m256i = _mm512_extracti64x4_epi64::<IMM1>(a);
25894	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:i64x4::ZERO))
25895	}
25896	}
25897
25898	/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
25899	///
25900	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x4_pd&expand=2454)
25901	#[inline]
25902	#[target_feature(enable = "avx512f")]
25903	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25904	#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = `1`))]
25905	#[rustc_legacy_const_generics(`1`)]
25906	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25907	pub const fn _mm512_extractf64x4_pd<const IMM8: i32>(a: __m512d) -> __m256d {
25908	unsafe {
25909	static_assert_uimm_bits!(IMM8, `1`);
25910	match IMM8 & `0x1` {
25911	`0` => simd_shuffle!(a, _mm512_undefined_pd(), [`0`, `1`, `2`, `3`]),
25912	_ => simd_shuffle!(a, _mm512_undefined_pd(), [`4`, `5`, `6`, `7`]),
25913	}
25914	}
25915	}
25916
25917	/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25918	///
25919	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x4_pd&expand=2455)
25920	#[inline]
25921	#[target_feature(enable = "avx512f")]
25922	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25923	#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = `1`))]
25924	#[rustc_legacy_const_generics(`3`)]
25925	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25926	pub const fn _mm512_mask_extractf64x4_pd<const IMM8: i32>(
25927	src: __m256d,
25928	k: __mmask8,
25929	a: __m512d,
25930	) -> __m256d {
25931	unsafe {
25932	static_assert_uimm_bits!(IMM8, `1`);
25933	let r: __m256d = _mm512_extractf64x4_pd::<IMM8>(a);
25934	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
25935	}
25936	}
25937
25938	/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25939	///
25940	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x4_pd&expand=2456)
25941	#[inline]
25942	#[target_feature(enable = "avx512f")]
25943	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25944	#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = `1`))]
25945	#[rustc_legacy_const_generics(`2`)]
25946	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25947	pub const fn _mm512_maskz_extractf64x4_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m256d {
25948	unsafe {
25949	static_assert_uimm_bits!(IMM8, `1`);
25950	let r: __m256d = _mm512_extractf64x4_pd::<IMM8>(a);
25951	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
25952	}
25953	}
25954
25955	/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the result in dst.
25956	///
25957	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x4_epi32&expand=2461)
25958	#[inline]
25959	#[target_feature(enable = "avx512f")]
25960	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25961	#[cfg_attr(
25962	test,
25963	assert_instr(vextractf32x4, IMM2 = `3`) //should be vextracti32x4
25964	)]
25965	#[rustc_legacy_const_generics(`1`)]
25966	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25967	pub const fn _mm512_extracti32x4_epi32<const IMM2: i32>(a: __m512i) -> __m128i {
25968	unsafe {
25969	static_assert_uimm_bits!(IMM2, `2`);
25970	let a: Simd = a.as_i32x16();
25971	let zero: Simd = i32x16::ZERO;
25972	let extract: i32x4 = match IMM2 {
25973	`0` => simd_shuffle!(a, zero, [`0`, `1`, `2`, `3`]),
25974	`1` => simd_shuffle!(a, zero, [`4`, `5`, `6`, `7`]),
25975	`2` => simd_shuffle!(a, zero, [`8`, `9`, `10`, `11`]),
25976	_ => simd_shuffle!(a, zero, [`12`, `13`, `14`, `15`]),
25977	};
25978	transmute(src:extract)
25979	}
25980	}
25981
25982	/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25983	///
25984	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x4_epi32&expand=2462)
25985	#[inline]
25986	#[target_feature(enable = "avx512f")]
25987	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25988	#[cfg_attr(test, assert_instr(vextracti32x4, IMM2 = `3`))]
25989	#[rustc_legacy_const_generics(`3`)]
25990	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25991	pub const fn _mm512_mask_extracti32x4_epi32<const IMM2: i32>(
25992	src: __m128i,
25993	k: __mmask8,
25994	a: __m512i,
25995	) -> __m128i {
25996	unsafe {
25997	static_assert_uimm_bits!(IMM2, `2`);
25998	let r: __m128i = _mm512_extracti32x4_epi32::<IMM2>(a);
25999	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
26000	}
26001	}
26002
26003	/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26004	///
26005	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x4_epi32&expand=2463)
26006	#[inline]
26007	#[target_feature(enable = "avx512f")]
26008	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26009	#[cfg_attr(test, assert_instr(vextracti32x4, IMM2 = `3`))]
26010	#[rustc_legacy_const_generics(`2`)]
26011	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26012	pub const fn _mm512_maskz_extracti32x4_epi32<const IMM2: i32>(k: __mmask8, a: __m512i) -> __m128i {
26013	unsafe {
26014	static_assert_uimm_bits!(IMM2, `2`);
26015	let r: __m128i = _mm512_extracti32x4_epi32::<IMM2>(a);
26016	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:i32x4::ZERO))
26017	}
26018	}
26019
26020	/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the result in dst.
26021	///
26022	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti32x4_epi32&expand=2458)
26023	#[inline]
26024	#[target_feature(enable = "avx512f,avx512vl")]
26025	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26026	#[cfg_attr(
26027	test,
26028	assert_instr(vextract, IMM1 = `1`) //should be vextracti32x4
26029	)]
26030	#[rustc_legacy_const_generics(`1`)]
26031	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26032	pub const fn _mm256_extracti32x4_epi32<const IMM1: i32>(a: __m256i) -> __m128i {
26033	unsafe {
26034	static_assert_uimm_bits!(IMM1, `1`);
26035	let a: Simd = a.as_i32x8();
26036	let zero: Simd = i32x8::ZERO;
26037	let extract: i32x4 = match IMM1 {
26038	`0` => simd_shuffle!(a, zero, [`0`, `1`, `2`, `3`]),
26039	_ => simd_shuffle!(a, zero, [`4`, `5`, `6`, `7`]),
26040	};
26041	transmute(src:extract)
26042	}
26043	}
26044
26045	/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26046	///
26047	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti32x4_epi32&expand=2459)
26048	#[inline]
26049	#[target_feature(enable = "avx512f,avx512vl")]
26050	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26051	#[cfg_attr(test, assert_instr(vextracti32x4, IMM1 = `1`))]
26052	#[rustc_legacy_const_generics(`3`)]
26053	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26054	pub const fn _mm256_mask_extracti32x4_epi32<const IMM1: i32>(
26055	src: __m128i,
26056	k: __mmask8,
26057	a: __m256i,
26058	) -> __m128i {
26059	unsafe {
26060	static_assert_uimm_bits!(IMM1, `1`);
26061	let r: __m128i = _mm256_extracti32x4_epi32::<IMM1>(a);
26062	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
26063	}
26064	}
26065
26066	/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26067	///
26068	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti32x4_epi32&expand=2460)
26069	#[inline]
26070	#[target_feature(enable = "avx512f,avx512vl")]
26071	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26072	#[cfg_attr(test, assert_instr(vextracti32x4, IMM1 = `1`))]
26073	#[rustc_legacy_const_generics(`2`)]
26074	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26075	pub const fn _mm256_maskz_extracti32x4_epi32<const IMM1: i32>(k: __mmask8, a: __m256i) -> __m128i {
26076	unsafe {
26077	static_assert_uimm_bits!(IMM1, `1`);
26078	let r: __m128i = _mm256_extracti32x4_epi32::<IMM1>(a);
26079	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:i32x4::ZERO))
26080	}
26081	}
26082
26083	/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
26084	///
26085	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_moveldup_ps&expand=3862)
26086	#[inline]
26087	#[target_feature(enable = "avx512f")]
26088	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26089	#[cfg_attr(test, assert_instr(vmovsldup))]
26090	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26091	pub const fn _mm512_moveldup_ps(a: __m512) -> __m512 {
26092	unsafe {
26093	let r: f32x16 = simd_shuffle!(a, a, [`0`, `0`, `2`, `2`, `4`, `4`, `6`, `6`, `8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`]);
26094	transmute(src:r)
26095	}
26096	}
26097
26098	/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26099	///
26100	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_moveldup_ps&expand=3860)
26101	#[inline]
26102	#[target_feature(enable = "avx512f")]
26103	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26104	#[cfg_attr(test, assert_instr(vmovsldup))]
26105	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26106	pub const fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
26107	unsafe {
26108	let mov: f32x16 =
26109	simd_shuffle!(a, a, [`0`, `0`, `2`, `2`, `4`, `4`, `6`, `6`, `8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`]);
26110	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x16()))
26111	}
26112	}
26113
26114	/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26115	///
26116	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_moveldup_ps&expand=3861)
26117	#[inline]
26118	#[target_feature(enable = "avx512f")]
26119	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26120	#[cfg_attr(test, assert_instr(vmovsldup))]
26121	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26122	pub const fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
26123	unsafe {
26124	let mov: f32x16 =
26125	simd_shuffle!(a, a, [`0`, `0`, `2`, `2`, `4`, `4`, `6`, `6`, `8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`]);
26126	transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x16::ZERO))
26127	}
26128	}
26129
26130	/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26131	///
26132	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_moveldup_ps&expand=3857)
26133	#[inline]
26134	#[target_feature(enable = "avx512f,avx512vl")]
26135	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26136	#[cfg_attr(test, assert_instr(vmovsldup))]
26137	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26138	pub const fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
26139	unsafe {
26140	let mov: __m256 = _mm256_moveldup_ps(a);
26141	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:src.as_f32x8()))
26142	}
26143	}
26144
26145	/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26146	///
26147	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_moveldup_ps&expand=3858)
26148	#[inline]
26149	#[target_feature(enable = "avx512f,avx512vl")]
26150	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26151	#[cfg_attr(test, assert_instr(vmovsldup))]
26152	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26153	pub const fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 {
26154	unsafe {
26155	let mov: __m256 = _mm256_moveldup_ps(a);
26156	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:f32x8::ZERO))
26157	}
26158	}
26159
26160	/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26161	///
26162	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_moveldup_ps&expand=3854)
26163	#[inline]
26164	#[target_feature(enable = "avx512f,avx512vl")]
26165	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26166	#[cfg_attr(test, assert_instr(vmovsldup))]
26167	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26168	pub const fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
26169	unsafe {
26170	let mov: __m128 = _mm_moveldup_ps(a);
26171	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:src.as_f32x4()))
26172	}
26173	}
26174
26175	/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26176	///
26177	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_moveldup_ps&expand=3855)
26178	#[inline]
26179	#[target_feature(enable = "avx512f,avx512vl")]
26180	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26181	#[cfg_attr(test, assert_instr(vmovsldup))]
26182	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26183	pub const fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 {
26184	unsafe {
26185	let mov: __m128 = _mm_moveldup_ps(a);
26186	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:f32x4::ZERO))
26187	}
26188	}
26189
26190	/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
26191	///
26192	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movehdup_ps&expand=3852)
26193	#[inline]
26194	#[target_feature(enable = "avx512f")]
26195	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26196	#[cfg_attr(test, assert_instr(vmovshdup))]
26197	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26198	pub const fn _mm512_movehdup_ps(a: __m512) -> __m512 {
26199	unsafe {
26200	let r: f32x16 = simd_shuffle!(a, a, [`1`, `1`, `3`, `3`, `5`, `5`, `7`, `7`, `9`, `9`, `11`, `11`, `13`, `13`, `15`, `15`]);
26201	transmute(src:r)
26202	}
26203	}
26204
26205	/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26206	///
26207	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movehdup_ps&expand=3850)
26208	#[inline]
26209	#[target_feature(enable = "avx512f")]
26210	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26211	#[cfg_attr(test, assert_instr(vmovshdup))]
26212	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26213	pub const fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
26214	unsafe {
26215	let mov: f32x16 =
26216	simd_shuffle!(a, a, [`1`, `1`, `3`, `3`, `5`, `5`, `7`, `7`, `9`, `9`, `11`, `11`, `13`, `13`, `15`, `15`]);
26217	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x16()))
26218	}
26219	}
26220
26221	/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26222	///
26223	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movehdup_ps&expand=3851)
26224	#[inline]
26225	#[target_feature(enable = "avx512f")]
26226	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26227	#[cfg_attr(test, assert_instr(vmovshdup))]
26228	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26229	pub const fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
26230	unsafe {
26231	let mov: f32x16 =
26232	simd_shuffle!(a, a, [`1`, `1`, `3`, `3`, `5`, `5`, `7`, `7`, `9`, `9`, `11`, `11`, `13`, `13`, `15`, `15`]);
26233	transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x16::ZERO))
26234	}
26235	}
26236
26237	/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26238	///
26239	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movehdup_ps&expand=3847)
26240	#[inline]
26241	#[target_feature(enable = "avx512f,avx512vl")]
26242	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26243	#[cfg_attr(test, assert_instr(vmovshdup))]
26244	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26245	pub const fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
26246	unsafe {
26247	let mov: __m256 = _mm256_movehdup_ps(a);
26248	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:src.as_f32x8()))
26249	}
26250	}
26251
26252	/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26253	///
26254	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movehdup_ps&expand=3848)
26255	#[inline]
26256	#[target_feature(enable = "avx512f,avx512vl")]
26257	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26258	#[cfg_attr(test, assert_instr(vmovshdup))]
26259	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26260	pub const fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 {
26261	unsafe {
26262	let mov: __m256 = _mm256_movehdup_ps(a);
26263	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:f32x8::ZERO))
26264	}
26265	}
26266
26267	/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26268	///
26269	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movehdup_ps&expand=3844)
26270	#[inline]
26271	#[target_feature(enable = "avx512f,avx512vl")]
26272	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26273	#[cfg_attr(test, assert_instr(vmovshdup))]
26274	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26275	pub const fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
26276	unsafe {
26277	let mov: __m128 = _mm_movehdup_ps(a);
26278	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:src.as_f32x4()))
26279	}
26280	}
26281
26282	/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26283	///
26284	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movehdup_ps&expand=3845)
26285	#[inline]
26286	#[target_feature(enable = "avx512f,avx512vl")]
26287	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26288	#[cfg_attr(test, assert_instr(vmovshdup))]
26289	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26290	pub const fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 {
26291	unsafe {
26292	let mov: __m128 = _mm_movehdup_ps(a);
26293	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:f32x4::ZERO))
26294	}
26295	}
26296
26297	/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst.
26298	///
26299	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movedup_pd&expand=3843)
26300	#[inline]
26301	#[target_feature(enable = "avx512f")]
26302	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26303	#[cfg_attr(test, assert_instr(vmovddup))]
26304	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26305	pub const fn _mm512_movedup_pd(a: __m512d) -> __m512d {
26306	unsafe {
26307	let r: f64x8 = simd_shuffle!(a, a, [`0`, `0`, `2`, `2`, `4`, `4`, `6`, `6`]);
26308	transmute(src:r)
26309	}
26310	}
26311
26312	/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26313	///
26314	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movedup_pd&expand=3841)
26315	#[inline]
26316	#[target_feature(enable = "avx512f")]
26317	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26318	#[cfg_attr(test, assert_instr(vmovddup))]
26319	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26320	pub const fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
26321	unsafe {
26322	let mov: f64x8 = simd_shuffle!(a, a, [`0`, `0`, `2`, `2`, `4`, `4`, `6`, `6`]);
26323	transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x8()))
26324	}
26325	}
26326
26327	/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26328	///
26329	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movedup_pd&expand=3842)
26330	#[inline]
26331	#[target_feature(enable = "avx512f")]
26332	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26333	#[cfg_attr(test, assert_instr(vmovddup))]
26334	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26335	pub const fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
26336	unsafe {
26337	let mov: f64x8 = simd_shuffle!(a, a, [`0`, `0`, `2`, `2`, `4`, `4`, `6`, `6`]);
26338	transmute(src:simd_select_bitmask(m:k, yes:mov, no:f64x8::ZERO))
26339	}
26340	}
26341
26342	/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26343	///
26344	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movedup_pd&expand=3838)
26345	#[inline]
26346	#[target_feature(enable = "avx512f,avx512vl")]
26347	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26348	#[cfg_attr(test, assert_instr(vmovddup))]
26349	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26350	pub const fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
26351	unsafe {
26352	let mov: __m256d = _mm256_movedup_pd(a);
26353	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x4(), no:src.as_f64x4()))
26354	}
26355	}
26356
26357	/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26358	///
26359	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movedup_pd&expand=3839)
26360	#[inline]
26361	#[target_feature(enable = "avx512f,avx512vl")]
26362	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26363	#[cfg_attr(test, assert_instr(vmovddup))]
26364	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26365	pub const fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d {
26366	unsafe {
26367	let mov: __m256d = _mm256_movedup_pd(a);
26368	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x4(), no:f64x4::ZERO))
26369	}
26370	}
26371
26372	/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26373	///
26374	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movedup_pd&expand=3835)
26375	#[inline]
26376	#[target_feature(enable = "avx512f,avx512vl")]
26377	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26378	#[cfg_attr(test, assert_instr(vmovddup))]
26379	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26380	pub const fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
26381	unsafe {
26382	let mov: __m128d = _mm_movedup_pd(a);
26383	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x2(), no:src.as_f64x2()))
26384	}
26385	}
26386
26387	/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26388	///
26389	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movedup_pd&expand=3836)
26390	#[inline]
26391	#[target_feature(enable = "avx512f,avx512vl")]
26392	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26393	#[cfg_attr(test, assert_instr(vmovddup))]
26394	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26395	pub const fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d {
26396	unsafe {
26397	let mov: __m128d = _mm_movedup_pd(a);
26398	transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x2(), no:f64x2::ZERO))
26399	}
26400	}
26401
26402	/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
26403	///
26404	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x4&expand=3174)
26405	#[inline]
26406	#[target_feature(enable = "avx512f")]
26407	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26408	#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = `2`))] //should be vinserti32x4
26409	#[rustc_legacy_const_generics(`2`)]
26410	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26411	pub const fn _mm512_inserti32x4<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
26412	unsafe {
26413	static_assert_uimm_bits!(IMM8, `2`);
26414	let a = a.as_i32x16();
26415	let b = _mm512_castsi128_si512(b).as_i32x16();
26416	let ret: i32x16 = match IMM8 & `0b11` {
26417	`0` => {
26418	simd_shuffle!(
26419	a,
26420	b,
26421	[`16`, `17`, `18`, `19`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`],
26422	)
26423	}
26424	`1` => {
26425	simd_shuffle!(
26426	a,
26427	b,
26428	[`0`, `1`, `2`, `3`, `16`, `17`, `18`, `19`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`],
26429	)
26430	}
26431	`2` => {
26432	simd_shuffle!(
26433	a,
26434	b,
26435	[`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `16`, `17`, `18`, `19`, `12`, `13`, `14`, `15`],
26436	)
26437	}
26438	_ => {
26439	simd_shuffle!(a, b, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `16`, `17`, `18`, `19`])
26440	}
26441	};
26442	transmute(ret)
26443	}
26444	}
26445
26446	/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26447	///
26448	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x4&expand=3175)
26449	#[inline]
26450	#[target_feature(enable = "avx512f")]
26451	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26452	#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = `2`))]
26453	#[rustc_legacy_const_generics(`4`)]
26454	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26455	pub const fn _mm512_mask_inserti32x4<const IMM8: i32>(
26456	src: __m512i,
26457	k: __mmask16,
26458	a: __m512i,
26459	b: __m128i,
26460	) -> __m512i {
26461	unsafe {
26462	static_assert_uimm_bits!(IMM8, `2`);
26463	let r: __m512i = _mm512_inserti32x4::<IMM8>(a, b);
26464	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
26465	}
26466	}
26467
26468	/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26469	///
26470	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x4&expand=3176)
26471	#[inline]
26472	#[target_feature(enable = "avx512f")]
26473	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26474	#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = `2`))]
26475	#[rustc_legacy_const_generics(`3`)]
26476	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26477	pub const fn _mm512_maskz_inserti32x4<const IMM8: i32>(
26478	k: __mmask16,
26479	a: __m512i,
26480	b: __m128i,
26481	) -> __m512i {
26482	unsafe {
26483	static_assert_uimm_bits!(IMM8, `2`);
26484	let r: __m512i = _mm512_inserti32x4::<IMM8>(a, b);
26485	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:i32x16::ZERO))
26486	}
26487	}
26488
26489	/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
26490	///
26491	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti32x4&expand=3171)
26492	#[inline]
26493	#[target_feature(enable = "avx512f,avx512vl")]
26494	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26495	#[cfg_attr(
26496	test,
26497	assert_instr(vinsert, IMM8 = `1`) //should be vinserti32x4
26498	)]
26499	#[rustc_legacy_const_generics(`2`)]
26500	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26501	pub const fn _mm256_inserti32x4<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
26502	unsafe {
26503	static_assert_uimm_bits!(IMM8, `1`);
26504	let a: Simd = a.as_i32x8();
26505	let b: Simd = _mm256_castsi128_si256(b).as_i32x8();
26506	let ret: i32x8 = match IMM8 & `0b1` {
26507	`0` => simd_shuffle!(a, b, [`8`, `9`, `10`, `11`, `4`, `5`, `6`, `7`]),
26508	_ => simd_shuffle!(a, b, [`0`, `1`, `2`, `3`, `8`, `9`, `10`, `11`]),
26509	};
26510	transmute(src:ret)
26511	}
26512	}
26513
26514	/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26515	///
26516	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti32x4&expand=3172)
26517	#[inline]
26518	#[target_feature(enable = "avx512f,avx512vl")]
26519	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26520	#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = `1`))]
26521	#[rustc_legacy_const_generics(`4`)]
26522	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26523	pub const fn _mm256_mask_inserti32x4<const IMM8: i32>(
26524	src: __m256i,
26525	k: __mmask8,
26526	a: __m256i,
26527	b: __m128i,
26528	) -> __m256i {
26529	unsafe {
26530	static_assert_uimm_bits!(IMM8, `1`);
26531	let r: __m256i = _mm256_inserti32x4::<IMM8>(a, b);
26532	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
26533	}
26534	}
26535
26536	/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26537	///
26538	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti32x4&expand=3173)
26539	#[inline]
26540	#[target_feature(enable = "avx512f,avx512vl")]
26541	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26542	#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = `1`))]
26543	#[rustc_legacy_const_generics(`3`)]
26544	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26545	pub const fn _mm256_maskz_inserti32x4<const IMM8: i32>(
26546	k: __mmask8,
26547	a: __m256i,
26548	b: __m128i,
26549	) -> __m256i {
26550	unsafe {
26551	static_assert_uimm_bits!(IMM8, `1`);
26552	let r: __m256i = _mm256_inserti32x4::<IMM8>(a, b);
26553	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:i32x8::ZERO))
26554	}
26555	}
26556
26557	/// Copy a to dst, then insert 256 bits (composed of 4 packed 64-bit integers) from b into dst at the location specified by imm8.
26558	///
26559	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x4&expand=3186)
26560	#[inline]
26561	#[target_feature(enable = "avx512f")]
26562	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26563	#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = `1`))] //should be vinserti64x4
26564	#[rustc_legacy_const_generics(`2`)]
26565	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26566	pub const fn _mm512_inserti64x4<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
26567	unsafe {
26568	static_assert_uimm_bits!(IMM8, `1`);
26569	let b: __m512i = _mm512_castsi256_si512(b);
26570	match IMM8 & `0b1` {
26571	`0` => simd_shuffle!(a, b, [`8`, `9`, `10`, `11`, `4`, `5`, `6`, `7`]),
26572	_ => simd_shuffle!(a, b, [`0`, `1`, `2`, `3`, `8`, `9`, `10`, `11`]),
26573	}
26574	}
26575	}
26576
26577	/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26578	///
26579	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x4&expand=3187)
26580	#[inline]
26581	#[target_feature(enable = "avx512f")]
26582	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26583	#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = `1`))]
26584	#[rustc_legacy_const_generics(`4`)]
26585	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26586	pub const fn _mm512_mask_inserti64x4<const IMM8: i32>(
26587	src: __m512i,
26588	k: __mmask8,
26589	a: __m512i,
26590	b: __m256i,
26591	) -> __m512i {
26592	unsafe {
26593	static_assert_uimm_bits!(IMM8, `1`);
26594	let r: __m512i = _mm512_inserti64x4::<IMM8>(a, b);
26595	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
26596	}
26597	}
26598
26599	/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26600	///
26601	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x4&expand=3188)
26602	#[inline]
26603	#[target_feature(enable = "avx512f")]
26604	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26605	#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = `1`))]
26606	#[rustc_legacy_const_generics(`3`)]
26607	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26608	pub const fn _mm512_maskz_inserti64x4<const IMM8: i32>(
26609	k: __mmask8,
26610	a: __m512i,
26611	b: __m256i,
26612	) -> __m512i {
26613	unsafe {
26614	static_assert_uimm_bits!(IMM8, `1`);
26615	let r: __m512i = _mm512_inserti64x4::<IMM8>(a, b);
26616	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:i64x8::ZERO))
26617	}
26618	}
26619
26620	/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
26621	///
26622	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x4&expand=3155)
26623	#[inline]
26624	#[target_feature(enable = "avx512f")]
26625	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26626	#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = `2`))]
26627	#[rustc_legacy_const_generics(`2`)]
26628	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26629	pub const fn _mm512_insertf32x4<const IMM8: i32>(a: __m512, b: __m128) -> __m512 {
26630	unsafe {
26631	static_assert_uimm_bits!(IMM8, `2`);
26632	let b = _mm512_castps128_ps512(b);
26633	match IMM8 & `0b11` {
26634	`0` => {
26635	simd_shuffle!(
26636	a,
26637	b,
26638	[`16`, `17`, `18`, `19`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`],
26639	)
26640	}
26641	`1` => {
26642	simd_shuffle!(
26643	a,
26644	b,
26645	[`0`, `1`, `2`, `3`, `16`, `17`, `18`, `19`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`],
26646	)
26647	}
26648	`2` => {
26649	simd_shuffle!(
26650	a,
26651	b,
26652	[`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `16`, `17`, `18`, `19`, `12`, `13`, `14`, `15`],
26653	)
26654	}
26655	_ => {
26656	simd_shuffle!(a, b, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `16`, `17`, `18`, `19`])
26657	}
26658	}
26659	}
26660	}
26661
26662	/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26663	///
26664	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x4&expand=3156)
26665	#[inline]
26666	#[target_feature(enable = "avx512f")]
26667	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26668	#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = `2`))]
26669	#[rustc_legacy_const_generics(`4`)]
26670	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26671	pub const fn _mm512_mask_insertf32x4<const IMM8: i32>(
26672	src: __m512,
26673	k: __mmask16,
26674	a: __m512,
26675	b: __m128,
26676	) -> __m512 {
26677	unsafe {
26678	static_assert_uimm_bits!(IMM8, `2`);
26679	let r: __m512 = _mm512_insertf32x4::<IMM8>(a, b);
26680	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
26681	}
26682	}
26683
26684	/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26685	///
26686	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x4&expand=3157)
26687	#[inline]
26688	#[target_feature(enable = "avx512f")]
26689	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26690	#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = `2`))]
26691	#[rustc_legacy_const_generics(`3`)]
26692	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26693	pub const fn _mm512_maskz_insertf32x4<const IMM8: i32>(
26694	k: __mmask16,
26695	a: __m512,
26696	b: __m128,
26697	) -> __m512 {
26698	unsafe {
26699	static_assert_uimm_bits!(IMM8, `2`);
26700	let r: __m512 = _mm512_insertf32x4::<IMM8>(a, b);
26701	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:f32x16::ZERO))
26702	}
26703	}
26704
26705	/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
26706	///
26707	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf32x4&expand=3152)
26708	#[inline]
26709	#[target_feature(enable = "avx512f,avx512vl")]
26710	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26711	#[cfg_attr(
26712	test,
26713	assert_instr(vinsert, IMM8 = `1`) //should be vinsertf32x4
26714	)]
26715	#[rustc_legacy_const_generics(`2`)]
26716	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26717	pub const fn _mm256_insertf32x4<const IMM8: i32>(a: __m256, b: __m128) -> __m256 {
26718	unsafe {
26719	static_assert_uimm_bits!(IMM8, `1`);
26720	let b: __m256 = _mm256_castps128_ps256(b);
26721	match IMM8 & `0b1` {
26722	`0` => simd_shuffle!(a, b, [`8`, `9`, `10`, `11`, `4`, `5`, `6`, `7`]),
26723	_ => simd_shuffle!(a, b, [`0`, `1`, `2`, `3`, `8`, `9`, `10`, `11`]),
26724	}
26725	}
26726	}
26727
26728	/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26729	///
26730	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf32x4&expand=3153)
26731	#[inline]
26732	#[target_feature(enable = "avx512f,avx512vl")]
26733	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26734	#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = `1`))]
26735	#[rustc_legacy_const_generics(`4`)]
26736	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26737	pub const fn _mm256_mask_insertf32x4<const IMM8: i32>(
26738	src: __m256,
26739	k: __mmask8,
26740	a: __m256,
26741	b: __m128,
26742	) -> __m256 {
26743	unsafe {
26744	static_assert_uimm_bits!(IMM8, `1`);
26745	let r: __m256 = _mm256_insertf32x4::<IMM8>(a, b);
26746	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
26747	}
26748	}
26749
26750	/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26751	///
26752	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf32x4&expand=3154)
26753	#[inline]
26754	#[target_feature(enable = "avx512f,avx512vl")]
26755	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26756	#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = `1`))]
26757	#[rustc_legacy_const_generics(`3`)]
26758	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26759	pub const fn _mm256_maskz_insertf32x4<const IMM8: i32>(
26760	k: __mmask8,
26761	a: __m256,
26762	b: __m128,
26763	) -> __m256 {
26764	unsafe {
26765	static_assert_uimm_bits!(IMM8, `1`);
26766	let r: __m256 = _mm256_insertf32x4::<IMM8>(a, b);
26767	transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:f32x8::ZERO))
26768	}
26769	}
26770
26771	/// Copy a to dst, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into dst at the location specified by imm8.
26772	///
26773	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x4&expand=3167)
26774	#[inline]
26775	#[target_feature(enable = "avx512f")]
26776	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26777	#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = `1`))]
26778	#[rustc_legacy_const_generics(`2`)]
26779	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26780	pub const fn _mm512_insertf64x4<const IMM8: i32>(a: __m512d, b: __m256d) -> __m512d {
26781	unsafe {
26782	static_assert_uimm_bits!(IMM8, `1`);
26783	let b: __m512d = _mm512_castpd256_pd512(b);
26784	match IMM8 & `0b1` {
26785	`0` => simd_shuffle!(a, b, [`8`, `9`, `10`, `11`, `4`, `5`, `6`, `7`]),
26786	_ => simd_shuffle!(a, b, [`0`, `1`, `2`, `3`, `8`, `9`, `10`, `11`]),
26787	}
26788	}
26789	}
26790
26791	/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26792	///
26793	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x4&expand=3168)
26794	#[inline]
26795	#[target_feature(enable = "avx512f")]
26796	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26797	#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = `1`))]
26798	#[rustc_legacy_const_generics(`4`)]
26799	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26800	pub const fn _mm512_mask_insertf64x4<const IMM8: i32>(
26801	src: __m512d,
26802	k: __mmask8,
26803	a: __m512d,
26804	b: __m256d,
26805	) -> __m512d {
26806	unsafe {
26807	static_assert_uimm_bits!(IMM8, `1`);
26808	let r: __m512d = _mm512_insertf64x4::<IMM8>(a, b);
26809	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
26810	}
26811	}
26812
26813	/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26814	///
26815	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x4&expand=3169)
26816	#[inline]
26817	#[target_feature(enable = "avx512f")]
26818	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26819	#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = `1`))]
26820	#[rustc_legacy_const_generics(`3`)]
26821	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26822	pub const fn _mm512_maskz_insertf64x4<const IMM8: i32>(
26823	k: __mmask8,
26824	a: __m512d,
26825	b: __m256d,
26826	) -> __m512d {
26827	unsafe {
26828	static_assert_uimm_bits!(IMM8, `1`);
26829	let r: __m512d = _mm512_insertf64x4::<IMM8>(a, b);
26830	transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
26831	}
26832	}
26833
26834	/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
26835	///
26836	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi32&expand=6021)
26837	#[inline]
26838	#[target_feature(enable = "avx512f")]
26839	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26840	#[cfg_attr(test, assert_instr(vunpckhps))] //should be vpunpckhdq
26841	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26842	pub const fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i {
26843	unsafe {
26844	let a: Simd = a.as_i32x16();
26845	let b: Simd = b.as_i32x16();
26846	#[rustfmt::skip]
26847	let r: i32x16 = simd_shuffle!(
26848	a, b,
26849	[ `2`, `18`, `3`, `19`,
26850	`2` + `4`, `18` + `4`, `3` + `4`, `19` + `4`,
26851	`2` + `8`, `18` + `8`, `3` + `8`, `19` + `8`,
26852	`2` + `12`, `18` + `12`, `3` + `12`, `19` + `12`],
26853	);
26854	transmute(src:r)
26855	}
26856	}
26857
26858	/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26859	///
26860	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi32&expand=6019)
26861	#[inline]
26862	#[target_feature(enable = "avx512f")]
26863	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26864	#[cfg_attr(test, assert_instr(vpunpckhdq))]
26865	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26866	pub const fn _mm512_mask_unpackhi_epi32(
26867	src: __m512i,
26868	k: __mmask16,
26869	a: __m512i,
26870	b: __m512i,
26871	) -> __m512i {
26872	unsafe {
26873	let unpackhi: Simd = _mm512_unpackhi_epi32(a, b).as_i32x16();
26874	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i32x16()))
26875	}
26876	}
26877
26878	/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26879	///
26880	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi32&expand=6020)
26881	#[inline]
26882	#[target_feature(enable = "avx512f")]
26883	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26884	#[cfg_attr(test, assert_instr(vpunpckhdq))]
26885	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26886	pub const fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26887	unsafe {
26888	let unpackhi: Simd = _mm512_unpackhi_epi32(a, b).as_i32x16();
26889	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i32x16::ZERO))
26890	}
26891	}
26892
26893	/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26894	///
26895	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi32&expand=6016)
26896	#[inline]
26897	#[target_feature(enable = "avx512f,avx512vl")]
26898	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26899	#[cfg_attr(test, assert_instr(vpunpckhdq))]
26900	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26901	pub const fn _mm256_mask_unpackhi_epi32(
26902	src: __m256i,
26903	k: __mmask8,
26904	a: __m256i,
26905	b: __m256i,
26906	) -> __m256i {
26907	unsafe {
26908	let unpackhi: Simd = _mm256_unpackhi_epi32(a, b).as_i32x8();
26909	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i32x8()))
26910	}
26911	}
26912
26913	/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26914	///
26915	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi32&expand=6017)
26916	#[inline]
26917	#[target_feature(enable = "avx512f,avx512vl")]
26918	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26919	#[cfg_attr(test, assert_instr(vpunpckhdq))]
26920	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26921	pub const fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26922	unsafe {
26923	let unpackhi: Simd = _mm256_unpackhi_epi32(a, b).as_i32x8();
26924	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i32x8::ZERO))
26925	}
26926	}
26927
26928	/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26929	///
26930	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi32&expand=6013)
26931	#[inline]
26932	#[target_feature(enable = "avx512f,avx512vl")]
26933	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26934	#[cfg_attr(test, assert_instr(vpunpckhdq))]
26935	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26936	pub const fn _mm_mask_unpackhi_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26937	unsafe {
26938	let unpackhi: Simd = _mm_unpackhi_epi32(a, b).as_i32x4();
26939	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i32x4()))
26940	}
26941	}
26942
26943	/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26944	///
26945	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi32&expand=6014)
26946	#[inline]
26947	#[target_feature(enable = "avx512f,avx512vl")]
26948	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26949	#[cfg_attr(test, assert_instr(vpunpckhdq))]
26950	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26951	pub const fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26952	unsafe {
26953	let unpackhi: Simd = _mm_unpackhi_epi32(a, b).as_i32x4();
26954	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i32x4::ZERO))
26955	}
26956	}
26957
26958	/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
26959	///
26960	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi64&expand=6030)
26961	#[inline]
26962	#[target_feature(enable = "avx512f")]
26963	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26964	#[cfg_attr(test, assert_instr(vunpckhpd))] //should be vpunpckhqdq
26965	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26966	pub const fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i {
26967	unsafe { simd_shuffle!(a, b, [`1`, `9`, `1` + `2`, `9` + `2`, `1` + `4`, `9` + `4`, `1` + `6`, `9` + `6`]) }
26968	}
26969
26970	/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26971	///
26972	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi64&expand=6028)
26973	#[inline]
26974	#[target_feature(enable = "avx512f")]
26975	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26976	#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26977	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26978	pub const fn _mm512_mask_unpackhi_epi64(
26979	src: __m512i,
26980	k: __mmask8,
26981	a: __m512i,
26982	b: __m512i,
26983	) -> __m512i {
26984	unsafe {
26985	let unpackhi: Simd = _mm512_unpackhi_epi64(a, b).as_i64x8();
26986	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i64x8()))
26987	}
26988	}
26989
26990	/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26991	///
26992	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi64&expand=6029)
26993	#[inline]
26994	#[target_feature(enable = "avx512f")]
26995	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26996	#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26997	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26998	pub const fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26999	unsafe {
27000	let unpackhi: Simd = _mm512_unpackhi_epi64(a, b).as_i64x8();
27001	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i64x8::ZERO))
27002	}
27003	}
27004
27005	/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27006	///
27007	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi64&expand=6025)
27008	#[inline]
27009	#[target_feature(enable = "avx512f,avx512vl")]
27010	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27011	#[cfg_attr(test, assert_instr(vpunpckhqdq))]
27012	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27013	pub const fn _mm256_mask_unpackhi_epi64(
27014	src: __m256i,
27015	k: __mmask8,
27016	a: __m256i,
27017	b: __m256i,
27018	) -> __m256i {
27019	unsafe {
27020	let unpackhi: Simd = _mm256_unpackhi_epi64(a, b).as_i64x4();
27021	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i64x4()))
27022	}
27023	}
27024
27025	/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27026	///
27027	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi64&expand=6026)
27028	#[inline]
27029	#[target_feature(enable = "avx512f,avx512vl")]
27030	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27031	#[cfg_attr(test, assert_instr(vpunpckhqdq))]
27032	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27033	pub const fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27034	unsafe {
27035	let unpackhi: Simd = _mm256_unpackhi_epi64(a, b).as_i64x4();
27036	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i64x4::ZERO))
27037	}
27038	}
27039
27040	/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27041	///
27042	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi64&expand=6022)
27043	#[inline]
27044	#[target_feature(enable = "avx512f,avx512vl")]
27045	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27046	#[cfg_attr(test, assert_instr(vpunpckhqdq))]
27047	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27048	pub const fn _mm_mask_unpackhi_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27049	unsafe {
27050	let unpackhi: Simd = _mm_unpackhi_epi64(a, b).as_i64x2();
27051	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i64x2()))
27052	}
27053	}
27054
27055	/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27056	///
27057	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi64&expand=6023)
27058	#[inline]
27059	#[target_feature(enable = "avx512f,avx512vl")]
27060	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27061	#[cfg_attr(test, assert_instr(vpunpckhqdq))]
27062	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27063	pub const fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27064	unsafe {
27065	let unpackhi: Simd = _mm_unpackhi_epi64(a, b).as_i64x2();
27066	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i64x2::ZERO))
27067	}
27068	}
27069
27070	/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
27071	///
27072	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_ps&expand=6060)
27073	#[inline]
27074	#[target_feature(enable = "avx512f")]
27075	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27076	#[cfg_attr(test, assert_instr(vunpckhps))]
27077	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27078	pub const fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 {
27079	unsafe {
27080	#[rustfmt::skip]
27081	simd_shuffle!(
27082	a, b,
27083	[ `2`, `18`, `3`, `19`,
27084	`2` + `4`, `18` + `4`, `3` + `4`, `19` + `4`,
27085	`2` + `8`, `18` + `8`, `3` + `8`, `19` + `8`,
27086	`2` + `12`, `18` + `12`, `3` + `12`, `19` + `12`],
27087	)
27088	}
27089	}
27090
27091	/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27092	///
27093	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_ps&expand=6058)
27094	#[inline]
27095	#[target_feature(enable = "avx512f")]
27096	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27097	#[cfg_attr(test, assert_instr(vunpckhps))]
27098	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27099	pub const fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
27100	unsafe {
27101	let unpackhi: Simd = _mm512_unpackhi_ps(a, b).as_f32x16();
27102	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f32x16()))
27103	}
27104	}
27105
27106	/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27107	///
27108	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_ps&expand=6059)
27109	#[inline]
27110	#[target_feature(enable = "avx512f")]
27111	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27112	#[cfg_attr(test, assert_instr(vunpckhps))]
27113	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27114	pub const fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
27115	unsafe {
27116	let unpackhi: Simd = _mm512_unpackhi_ps(a, b).as_f32x16();
27117	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f32x16::ZERO))
27118	}
27119	}
27120
27121	/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27122	///
27123	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_ps&expand=6055)
27124	#[inline]
27125	#[target_feature(enable = "avx512f,avx512vl")]
27126	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27127	#[cfg_attr(test, assert_instr(vunpckhps))]
27128	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27129	pub const fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
27130	unsafe {
27131	let unpackhi: Simd = _mm256_unpackhi_ps(a, b).as_f32x8();
27132	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f32x8()))
27133	}
27134	}
27135
27136	/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27137	///
27138	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_ps&expand=6056)
27139	#[inline]
27140	#[target_feature(enable = "avx512f,avx512vl")]
27141	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27142	#[cfg_attr(test, assert_instr(vunpckhps))]
27143	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27144	pub const fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
27145	unsafe {
27146	let unpackhi: Simd = _mm256_unpackhi_ps(a, b).as_f32x8();
27147	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f32x8::ZERO))
27148	}
27149	}
27150
27151	/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27152	///
27153	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_ps&expand=6052)
27154	#[inline]
27155	#[target_feature(enable = "avx512f,avx512vl")]
27156	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27157	#[cfg_attr(test, assert_instr(vunpckhps))]
27158	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27159	pub const fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
27160	unsafe {
27161	let unpackhi: Simd = _mm_unpackhi_ps(a, b).as_f32x4();
27162	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f32x4()))
27163	}
27164	}
27165
27166	/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27167	///
27168	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_ps&expand=6053)
27169	#[inline]
27170	#[target_feature(enable = "avx512f,avx512vl")]
27171	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27172	#[cfg_attr(test, assert_instr(vunpckhps))]
27173	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27174	pub const fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
27175	unsafe {
27176	let unpackhi: Simd = _mm_unpackhi_ps(a, b).as_f32x4();
27177	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f32x4::ZERO))
27178	}
27179	}
27180
27181	/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
27182	///
27183	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_pd&expand=6048)
27184	#[inline]
27185	#[target_feature(enable = "avx512f")]
27186	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27187	#[cfg_attr(test, assert_instr(vunpckhpd))]
27188	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27189	pub const fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d {
27190	unsafe { simd_shuffle!(a, b, [`1`, `9`, `1` + `2`, `9` + `2`, `1` + `4`, `9` + `4`, `1` + `6`, `9` + `6`]) }
27191	}
27192
27193	/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27194	///
27195	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_pd&expand=6046)
27196	#[inline]
27197	#[target_feature(enable = "avx512f")]
27198	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27199	#[cfg_attr(test, assert_instr(vunpckhpd))]
27200	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27201	pub const fn _mm512_mask_unpackhi_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27202	unsafe {
27203	let unpackhi: Simd = _mm512_unpackhi_pd(a, b).as_f64x8();
27204	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f64x8()))
27205	}
27206	}
27207
27208	/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27209	///
27210	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_pd&expand=6047)
27211	#[inline]
27212	#[target_feature(enable = "avx512f")]
27213	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27214	#[cfg_attr(test, assert_instr(vunpckhpd))]
27215	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27216	pub const fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27217	unsafe {
27218	let unpackhi: Simd = _mm512_unpackhi_pd(a, b).as_f64x8();
27219	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f64x8::ZERO))
27220	}
27221	}
27222
27223	/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27224	///
27225	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_pd&expand=6043)
27226	#[inline]
27227	#[target_feature(enable = "avx512f,avx512vl")]
27228	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27229	#[cfg_attr(test, assert_instr(vunpckhpd))]
27230	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27231	pub const fn _mm256_mask_unpackhi_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27232	unsafe {
27233	let unpackhi: Simd = _mm256_unpackhi_pd(a, b).as_f64x4();
27234	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f64x4()))
27235	}
27236	}
27237
27238	/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27239	///
27240	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_pd&expand=6044)
27241	#[inline]
27242	#[target_feature(enable = "avx512f,avx512vl")]
27243	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27244	#[cfg_attr(test, assert_instr(vunpckhpd))]
27245	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27246	pub const fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27247	unsafe {
27248	let unpackhi: Simd = _mm256_unpackhi_pd(a, b).as_f64x4();
27249	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f64x4::ZERO))
27250	}
27251	}
27252
27253	/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27254	///
27255	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_pd&expand=6040)
27256	#[inline]
27257	#[target_feature(enable = "avx512f,avx512vl")]
27258	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27259	#[cfg_attr(test, assert_instr(vunpckhpd))]
27260	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27261	pub const fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27262	unsafe {
27263	let unpackhi: Simd = _mm_unpackhi_pd(a, b).as_f64x2();
27264	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f64x2()))
27265	}
27266	}
27267
27268	/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27269	///
27270	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_pd&expand=6041)
27271	#[inline]
27272	#[target_feature(enable = "avx512f,avx512vl")]
27273	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27274	#[cfg_attr(test, assert_instr(vunpckhpd))]
27275	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27276	pub const fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27277	unsafe {
27278	let unpackhi: Simd = _mm_unpackhi_pd(a, b).as_f64x2();
27279	transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f64x2::ZERO))
27280	}
27281	}
27282
27283	/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
27284	///
27285	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi32&expand=6078)
27286	#[inline]
27287	#[target_feature(enable = "avx512f")]
27288	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27289	#[cfg_attr(test, assert_instr(vunpcklps))] //should be vpunpckldq
27290	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27291	pub const fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i {
27292	unsafe {
27293	let a: Simd = a.as_i32x16();
27294	let b: Simd = b.as_i32x16();
27295	#[rustfmt::skip]
27296	let r: i32x16 = simd_shuffle!(
27297	a, b,
27298	[ `0`, `16`, `1`, `17`,
27299	`0` + `4`, `16` + `4`, `1` + `4`, `17` + `4`,
27300	`0` + `8`, `16` + `8`, `1` + `8`, `17` + `8`,
27301	`0` + `12`, `16` + `12`, `1` + `12`, `17` + `12`],
27302	);
27303	transmute(src:r)
27304	}
27305	}
27306
27307	/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27308	///
27309	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi32&expand=6076)
27310	#[inline]
27311	#[target_feature(enable = "avx512f")]
27312	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27313	#[cfg_attr(test, assert_instr(vpunpckldq))]
27314	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27315	pub const fn _mm512_mask_unpacklo_epi32(
27316	src: __m512i,
27317	k: __mmask16,
27318	a: __m512i,
27319	b: __m512i,
27320	) -> __m512i {
27321	unsafe {
27322	let unpacklo: Simd = _mm512_unpacklo_epi32(a, b).as_i32x16();
27323	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i32x16()))
27324	}
27325	}
27326
27327	/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27328	///
27329	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi32&expand=6077)
27330	#[inline]
27331	#[target_feature(enable = "avx512f")]
27332	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27333	#[cfg_attr(test, assert_instr(vpunpckldq))]
27334	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27335	pub const fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27336	unsafe {
27337	let unpacklo: Simd = _mm512_unpacklo_epi32(a, b).as_i32x16();
27338	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i32x16::ZERO))
27339	}
27340	}
27341
27342	/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27343	///
27344	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi32&expand=6073)
27345	#[inline]
27346	#[target_feature(enable = "avx512f,avx512vl")]
27347	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27348	#[cfg_attr(test, assert_instr(vpunpckldq))]
27349	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27350	pub const fn _mm256_mask_unpacklo_epi32(
27351	src: __m256i,
27352	k: __mmask8,
27353	a: __m256i,
27354	b: __m256i,
27355	) -> __m256i {
27356	unsafe {
27357	let unpacklo: Simd = _mm256_unpacklo_epi32(a, b).as_i32x8();
27358	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i32x8()))
27359	}
27360	}
27361
27362	/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27363	///
27364	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi32&expand=6074)
27365	#[inline]
27366	#[target_feature(enable = "avx512f,avx512vl")]
27367	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27368	#[cfg_attr(test, assert_instr(vpunpckldq))]
27369	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27370	pub const fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27371	unsafe {
27372	let unpacklo: Simd = _mm256_unpacklo_epi32(a, b).as_i32x8();
27373	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i32x8::ZERO))
27374	}
27375	}
27376
27377	/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27378	///
27379	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi32&expand=6070)
27380	#[inline]
27381	#[target_feature(enable = "avx512f,avx512vl")]
27382	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27383	#[cfg_attr(test, assert_instr(vpunpckldq))]
27384	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27385	pub const fn _mm_mask_unpacklo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27386	unsafe {
27387	let unpacklo: Simd = _mm_unpacklo_epi32(a, b).as_i32x4();
27388	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i32x4()))
27389	}
27390	}
27391
27392	/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27393	///
27394	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi32&expand=6071)
27395	#[inline]
27396	#[target_feature(enable = "avx512f,avx512vl")]
27397	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27398	#[cfg_attr(test, assert_instr(vpunpckldq))]
27399	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27400	pub const fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27401	unsafe {
27402	let unpacklo: Simd = _mm_unpacklo_epi32(a, b).as_i32x4();
27403	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i32x4::ZERO))
27404	}
27405	}
27406
27407	/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
27408	///
27409	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi64&expand=6087)
27410	#[inline]
27411	#[target_feature(enable = "avx512f")]
27412	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27413	#[cfg_attr(test, assert_instr(vunpcklpd))] //should be vpunpcklqdq
27414	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27415	pub const fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i {
27416	unsafe { simd_shuffle!(a, b, [`0`, `8`, `0` + `2`, `8` + `2`, `0` + `4`, `8` + `4`, `0` + `6`, `8` + `6`]) }
27417	}
27418
27419	/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27420	///
27421	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi64&expand=6085)
27422	#[inline]
27423	#[target_feature(enable = "avx512f")]
27424	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27425	#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27426	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27427	pub const fn _mm512_mask_unpacklo_epi64(
27428	src: __m512i,
27429	k: __mmask8,
27430	a: __m512i,
27431	b: __m512i,
27432	) -> __m512i {
27433	unsafe {
27434	let unpacklo: Simd = _mm512_unpacklo_epi64(a, b).as_i64x8();
27435	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i64x8()))
27436	}
27437	}
27438
27439	/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27440	///
27441	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi64&expand=6086)
27442	#[inline]
27443	#[target_feature(enable = "avx512f")]
27444	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27445	#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27446	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27447	pub const fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27448	unsafe {
27449	let unpacklo: Simd = _mm512_unpacklo_epi64(a, b).as_i64x8();
27450	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i64x8::ZERO))
27451	}
27452	}
27453
27454	/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27455	///
27456	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi64&expand=6082)
27457	#[inline]
27458	#[target_feature(enable = "avx512f,avx512vl")]
27459	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27460	#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27461	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27462	pub const fn _mm256_mask_unpacklo_epi64(
27463	src: __m256i,
27464	k: __mmask8,
27465	a: __m256i,
27466	b: __m256i,
27467	) -> __m256i {
27468	unsafe {
27469	let unpacklo: Simd = _mm256_unpacklo_epi64(a, b).as_i64x4();
27470	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i64x4()))
27471	}
27472	}
27473
27474	/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27475	///
27476	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi64&expand=6083)
27477	#[inline]
27478	#[target_feature(enable = "avx512f,avx512vl")]
27479	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27480	#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27481	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27482	pub const fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27483	unsafe {
27484	let unpacklo: Simd = _mm256_unpacklo_epi64(a, b).as_i64x4();
27485	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i64x4::ZERO))
27486	}
27487	}
27488
27489	/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27490	///
27491	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi64&expand=6079)
27492	#[inline]
27493	#[target_feature(enable = "avx512f,avx512vl")]
27494	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27495	#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27496	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27497	pub const fn _mm_mask_unpacklo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27498	unsafe {
27499	let unpacklo: Simd = _mm_unpacklo_epi64(a, b).as_i64x2();
27500	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i64x2()))
27501	}
27502	}
27503
27504	/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27505	///
27506	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi64&expand=6080)
27507	#[inline]
27508	#[target_feature(enable = "avx512f,avx512vl")]
27509	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27510	#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27511	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27512	pub const fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27513	unsafe {
27514	let unpacklo: Simd = _mm_unpacklo_epi64(a, b).as_i64x2();
27515	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i64x2::ZERO))
27516	}
27517	}
27518
27519	/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
27520	///
27521	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_ps&expand=6117)
27522	#[inline]
27523	#[target_feature(enable = "avx512f")]
27524	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27525	#[cfg_attr(test, assert_instr(vunpcklps))]
27526	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27527	pub const fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 {
27528	unsafe {
27529	#[rustfmt::skip]
27530	simd_shuffle!(a, b,
27531	[ `0`, `16`, `1`, `17`,
27532	`0` + `4`, `16` + `4`, `1` + `4`, `17` + `4`,
27533	`0` + `8`, `16` + `8`, `1` + `8`, `17` + `8`,
27534	`0` + `12`, `16` + `12`, `1` + `12`, `17` + `12`],
27535	)
27536	}
27537	}
27538
27539	/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27540	///
27541	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_ps&expand=6115)
27542	#[inline]
27543	#[target_feature(enable = "avx512f")]
27544	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27545	#[cfg_attr(test, assert_instr(vunpcklps))]
27546	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27547	pub const fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
27548	unsafe {
27549	let unpacklo: Simd = _mm512_unpacklo_ps(a, b).as_f32x16();
27550	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f32x16()))
27551	}
27552	}
27553
27554	/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27555	///
27556	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_ps&expand=6116)
27557	#[inline]
27558	#[target_feature(enable = "avx512f")]
27559	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27560	#[cfg_attr(test, assert_instr(vunpcklps))]
27561	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27562	pub const fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
27563	unsafe {
27564	let unpacklo: Simd = _mm512_unpacklo_ps(a, b).as_f32x16();
27565	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f32x16::ZERO))
27566	}
27567	}
27568
27569	/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27570	///
27571	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_ps&expand=6112)
27572	#[inline]
27573	#[target_feature(enable = "avx512f,avx512vl")]
27574	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27575	#[cfg_attr(test, assert_instr(vunpcklps))]
27576	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27577	pub const fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
27578	unsafe {
27579	let unpacklo: Simd = _mm256_unpacklo_ps(a, b).as_f32x8();
27580	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f32x8()))
27581	}
27582	}
27583
27584	/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27585	///
27586	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_ps&expand=6113)
27587	#[inline]
27588	#[target_feature(enable = "avx512f,avx512vl")]
27589	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27590	#[cfg_attr(test, assert_instr(vunpcklps))]
27591	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27592	pub const fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
27593	unsafe {
27594	let unpacklo: Simd = _mm256_unpacklo_ps(a, b).as_f32x8();
27595	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f32x8::ZERO))
27596	}
27597	}
27598
27599	/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27600	///
27601	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_ps&expand=6109)
27602	#[inline]
27603	#[target_feature(enable = "avx512f,avx512vl")]
27604	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27605	#[cfg_attr(test, assert_instr(vunpcklps))]
27606	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27607	pub const fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
27608	unsafe {
27609	let unpacklo: Simd = _mm_unpacklo_ps(a, b).as_f32x4();
27610	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f32x4()))
27611	}
27612	}
27613
27614	/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27615	///
27616	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_ps&expand=6110)
27617	#[inline]
27618	#[target_feature(enable = "avx512f,avx512vl")]
27619	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27620	#[cfg_attr(test, assert_instr(vunpcklps))]
27621	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27622	pub const fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
27623	unsafe {
27624	let unpacklo: Simd = _mm_unpacklo_ps(a, b).as_f32x4();
27625	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f32x4::ZERO))
27626	}
27627	}
27628
27629	/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
27630	///
27631	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_pd&expand=6105)
27632	#[inline]
27633	#[target_feature(enable = "avx512f")]
27634	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27635	#[cfg_attr(test, assert_instr(vunpcklpd))]
27636	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27637	pub const fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d {
27638	unsafe { simd_shuffle!(a, b, [`0`, `8`, `0` + `2`, `8` + `2`, `0` + `4`, `8` + `4`, `0` + `6`, `8` + `6`]) }
27639	}
27640
27641	/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27642	///
27643	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_pd&expand=6103)
27644	#[inline]
27645	#[target_feature(enable = "avx512f")]
27646	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27647	#[cfg_attr(test, assert_instr(vunpcklpd))]
27648	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27649	pub const fn _mm512_mask_unpacklo_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27650	unsafe {
27651	let unpacklo: Simd = _mm512_unpacklo_pd(a, b).as_f64x8();
27652	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f64x8()))
27653	}
27654	}
27655
27656	/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27657	///
27658	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_pd&expand=6104)
27659	#[inline]
27660	#[target_feature(enable = "avx512f")]
27661	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27662	#[cfg_attr(test, assert_instr(vunpcklpd))]
27663	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27664	pub const fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27665	unsafe {
27666	let unpacklo: Simd = _mm512_unpacklo_pd(a, b).as_f64x8();
27667	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f64x8::ZERO))
27668	}
27669	}
27670
27671	/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27672	///
27673	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_pd&expand=6100)
27674	#[inline]
27675	#[target_feature(enable = "avx512f,avx512vl")]
27676	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27677	#[cfg_attr(test, assert_instr(vunpcklpd))]
27678	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27679	pub const fn _mm256_mask_unpacklo_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27680	unsafe {
27681	let unpacklo: Simd = _mm256_unpacklo_pd(a, b).as_f64x4();
27682	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f64x4()))
27683	}
27684	}
27685
27686	/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27687	///
27688	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_pd&expand=6101)
27689	#[inline]
27690	#[target_feature(enable = "avx512f,avx512vl")]
27691	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27692	#[cfg_attr(test, assert_instr(vunpcklpd))]
27693	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27694	pub const fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27695	unsafe {
27696	let unpacklo: Simd = _mm256_unpacklo_pd(a, b).as_f64x4();
27697	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f64x4::ZERO))
27698	}
27699	}
27700
27701	/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27702	///
27703	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_pd&expand=6097)
27704	#[inline]
27705	#[target_feature(enable = "avx512f,avx512vl")]
27706	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27707	#[cfg_attr(test, assert_instr(vunpcklpd))]
27708	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27709	pub const fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27710	unsafe {
27711	let unpacklo: Simd = _mm_unpacklo_pd(a, b).as_f64x2();
27712	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f64x2()))
27713	}
27714	}
27715
27716	/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27717	///
27718	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_pd&expand=6098)
27719	#[inline]
27720	#[target_feature(enable = "avx512f,avx512vl")]
27721	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27722	#[cfg_attr(test, assert_instr(vunpcklpd))]
27723	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27724	pub const fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27725	unsafe {
27726	let unpacklo: Simd = _mm_unpacklo_pd(a, b).as_f64x2();
27727	transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f64x2::ZERO))
27728	}
27729	}
27730
27731	/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are indeterminate.
27732	///
27733	/// In the Intel documentation, the upper bits are declared to be "undefined".
27734	/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27735	/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27736	///
27737	/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27738	///
27739	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps128_ps512&expand=621)
27740	#[inline]
27741	#[target_feature(enable = "avx512f")]
27742	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27743	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27744	pub const fn _mm512_castps128_ps512(a: __m128) -> __m512 {
27745	unsafe {
27746	simd_shuffle!(
27747	a,
27748	_mm_undefined_ps(),
27749	[`0`, `1`, `2`, `3`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`],
27750	)
27751	}
27752	}
27753
27754	/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are indeterminate.
27755	///
27756	/// In the Intel documentation, the upper bits are declared to be "undefined".
27757	/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27758	/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27759	///
27760	/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27761	///
27762	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps256_ps512&expand=623)
27763	#[inline]
27764	#[target_feature(enable = "avx512f")]
27765	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27766	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27767	pub const fn _mm512_castps256_ps512(a: __m256) -> __m512 {
27768	unsafe {
27769	simd_shuffle!(
27770	a,
27771	_mm256_undefined_ps(),
27772	[`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`],
27773	)
27774	}
27775	}
27776
27777	/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27778	///
27779	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps128_ps512&expand=6196)
27780	#[inline]
27781	#[target_feature(enable = "avx512f")]
27782	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27783	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27784	pub const fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
27785	unsafe {
27786	simd_shuffle!(
27787	a,
27788	_mm_set1_ps(`0.`),
27789	[`0`, `1`, `2`, `3`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`],
27790	)
27791	}
27792	}
27793
27794	/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27795	///
27796	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps256_ps512&expand=6197)
27797	#[inline]
27798	#[target_feature(enable = "avx512f")]
27799	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27800	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27801	pub const fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
27802	unsafe {
27803	simd_shuffle!(
27804	a,
27805	_mm256_set1_ps(`0.`),
27806	[`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `8`, `8`, `8`, `8`, `8`, `8`, `8`],
27807	)
27808	}
27809	}
27810
27811	/// Cast vector of type __m512 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27812	///
27813	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps128&expand=624)
27814	#[inline]
27815	#[target_feature(enable = "avx512f")]
27816	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27817	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27818	pub const fn _mm512_castps512_ps128(a: __m512) -> __m128 {
27819	unsafe { simd_shuffle!(a, a, [`0`, `1`, `2`, `3`]) }
27820	}
27821
27822	/// Cast vector of type __m512 to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27823	///
27824	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps256&expand=625)
27825	#[inline]
27826	#[target_feature(enable = "avx512f")]
27827	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27828	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27829	pub const fn _mm512_castps512_ps256(a: __m512) -> __m256 {
27830	unsafe { simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]) }
27831	}
27832
27833	/// Cast vector of type __m512 to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27834	///
27835	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_pd&expand=616)
27836	#[inline]
27837	#[target_feature(enable = "avx512f")]
27838	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27839	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27840	pub const fn _mm512_castps_pd(a: __m512) -> __m512d {
27841	unsafe { transmute(src:a) }
27842	}
27843
27844	/// Cast vector of type __m512 to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27845	///
27846	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_si512&expand=619)
27847	#[inline]
27848	#[target_feature(enable = "avx512f")]
27849	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27850	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27851	pub const fn _mm512_castps_si512(a: __m512) -> __m512i {
27852	unsafe { transmute(src:a) }
27853	}
27854
27855	/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are indeterminate.
27856	///
27857	/// In the Intel documentation, the upper bits are declared to be "undefined".
27858	/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27859	/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27860	///
27861	/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27862	///
27863	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd128_pd512&expand=609)
27864	#[inline]
27865	#[target_feature(enable = "avx512f")]
27866	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27867	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27868	pub const fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
27869	unsafe { simd_shuffle!(a, _mm_undefined_pd(), [`0`, `1`, `2`, `2`, `2`, `2`, `2`, `2`]) }
27870	}
27871
27872	/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are indeterminate.
27873	///
27874	/// In the Intel documentation, the upper bits are declared to be "undefined".
27875	/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27876	/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27877	///
27878	/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27879	///
27880	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd256_pd512&expand=611)
27881	#[inline]
27882	#[target_feature(enable = "avx512f")]
27883	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27884	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27885	pub const fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
27886	unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [`0`, `1`, `2`, `3`, `4`, `4`, `4`, `4`]) }
27887	}
27888
27889	/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27890	///
27891	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd128_pd512&expand=6193)
27892	#[inline]
27893	#[target_feature(enable = "avx512f")]
27894	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27895	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27896	pub const fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
27897	unsafe { simd_shuffle!(a, _mm_set1_pd(`0.`), [`0`, `1`, `2`, `2`, `2`, `2`, `2`, `2`]) }
27898	}
27899
27900	/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27901	///
27902	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd256_pd512&expand=6194)
27903	#[inline]
27904	#[target_feature(enable = "avx512f")]
27905	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27906	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27907	pub const fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
27908	unsafe { simd_shuffle!(a, _mm256_set1_pd(`0.`), [`0`, `1`, `2`, `3`, `4`, `4`, `4`, `4`]) }
27909	}
27910
27911	/// Cast vector of type __m512d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27912	///
27913	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd128&expand=612)
27914	#[inline]
27915	#[target_feature(enable = "avx512f")]
27916	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27917	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27918	pub const fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
27919	unsafe { simd_shuffle!(a, a, [`0`, `1`]) }
27920	}
27921
27922	/// Cast vector of type __m512d to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27923	///
27924	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd256&expand=613)
27925	#[inline]
27926	#[target_feature(enable = "avx512f")]
27927	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27928	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27929	pub const fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
27930	unsafe { simd_shuffle!(a, a, [`0`, `1`, `2`, `3`]) }
27931	}
27932
27933	/// Cast vector of type __m512d to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27934	///
27935	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_ps&expand=604)
27936	#[inline]
27937	#[target_feature(enable = "avx512f")]
27938	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27939	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27940	pub const fn _mm512_castpd_ps(a: __m512d) -> __m512 {
27941	unsafe { transmute(src:a) }
27942	}
27943
27944	/// Cast vector of type __m512d to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27945	///
27946	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_si512&expand=607)
27947	#[inline]
27948	#[target_feature(enable = "avx512f")]
27949	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27950	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27951	pub const fn _mm512_castpd_si512(a: __m512d) -> __m512i {
27952	unsafe { transmute(src:a) }
27953	}
27954
27955	/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are indeterminate.
27956	///
27957	/// In the Intel documentation, the upper bits are declared to be "undefined".
27958	/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27959	/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27960	///
27961	/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27962	///
27963	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi128_si512&expand=629)
27964	#[inline]
27965	#[target_feature(enable = "avx512f")]
27966	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27967	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27968	pub const fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
27969	unsafe { simd_shuffle!(a, _mm_undefined_si128(), [`0`, `1`, `2`, `2`, `2`, `2`, `2`, `2`]) }
27970	}
27971
27972	/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are indeterminate.
27973	///
27974	/// In the Intel documentation, the upper bits are declared to be "undefined".
27975	/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27976	/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27977	///
27978	/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27979	///
27980	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi256_si512&expand=633)
27981	#[inline]
27982	#[target_feature(enable = "avx512f")]
27983	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27984	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27985	pub const fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
27986	unsafe { simd_shuffle!(a, _mm256_undefined_si256(), [`0`, `1`, `2`, `3`, `4`, `4`, `4`, `4`]) }
27987	}
27988
27989	/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27990	///
27991	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi128_si512&expand=6199)
27992	#[inline]
27993	#[target_feature(enable = "avx512f")]
27994	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27995	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27996	pub const fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
27997	unsafe { simd_shuffle!(a, _mm_setzero_si128(), [`0`, `1`, `2`, `2`, `2`, `2`, `2`, `2`]) }
27998	}
27999
28000	/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28001	///
28002	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi256_si512&expand=6200)
28003	#[inline]
28004	#[target_feature(enable = "avx512f")]
28005	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28006	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28007	pub const fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
28008	unsafe { simd_shuffle!(a, _mm256_setzero_si256(), [`0`, `1`, `2`, `3`, `4`, `4`, `4`, `4`]) }
28009	}
28010
28011	/// Cast vector of type __m512i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28012	///
28013	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si128&expand=636)
28014	#[inline]
28015	#[target_feature(enable = "avx512f")]
28016	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28017	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28018	pub const fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
28019	unsafe { simd_shuffle!(a, a, [`0`, `1`]) }
28020	}
28021
28022	/// Cast vector of type __m512i to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28023	///
28024	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si256&expand=637)
28025	#[inline]
28026	#[target_feature(enable = "avx512f")]
28027	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28028	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28029	pub const fn _mm512_castsi512_si256(a: __m512i) -> __m256i {
28030	unsafe { simd_shuffle!(a, a, [`0`, `1`, `2`, `3`]) }
28031	}
28032
28033	/// Cast vector of type __m512i to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28034	///
28035	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_ps&expand=635)
28036	#[inline]
28037	#[target_feature(enable = "avx512f")]
28038	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28039	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28040	pub const fn _mm512_castsi512_ps(a: __m512i) -> __m512 {
28041	unsafe { transmute(src:a) }
28042	}
28043
28044	/// Cast vector of type __m512i to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28045	///
28046	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_pd&expand=634)
28047	#[inline]
28048	#[target_feature(enable = "avx512f")]
28049	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28050	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28051	pub const fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
28052	unsafe { transmute(src:a) }
28053	}
28054
28055	/// Copy the lower 32-bit integer in a to dst.
28056	///
28057	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsi512_si32&expand=1882)
28058	#[inline]
28059	#[target_feature(enable = "avx512f")]
28060	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28061	#[cfg_attr(test, assert_instr(vmovd))]
28062	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28063	pub const fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
28064	unsafe { simd_extract!(a.as_i32x16(), `0`) }
28065	}
28066
28067	/// Copy the lower single-precision (32-bit) floating-point element of a to dst.
28068	///
28069	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtss_f32)
28070	#[inline]
28071	#[target_feature(enable = "avx512f")]
28072	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28073	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28074	pub const fn _mm512_cvtss_f32(a: __m512) -> f32 {
28075	unsafe { simd_extract!(a, `0`) }
28076	}
28077
28078	/// Copy the lower double-precision (64-bit) floating-point element of a to dst.
28079	///
28080	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsd_f64)
28081	#[inline]
28082	#[target_feature(enable = "avx512f")]
28083	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28084	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28085	pub const fn _mm512_cvtsd_f64(a: __m512d) -> f64 {
28086	unsafe { simd_extract!(a, `0`) }
28087	}
28088
28089	/// Broadcast the low packed 32-bit integer from a to all elements of dst.
28090	///
28091	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastd_epi32&expand=545)
28092	#[inline]
28093	#[target_feature(enable = "avx512f")]
28094	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28095	#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastd
28096	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28097	pub const fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i {
28098	unsafe {
28099	let a: Simd = _mm512_castsi128_si512(a).as_i32x16();
28100	let ret: i32x16 = simd_shuffle!(a, a, [`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`]);
28101	transmute(src:ret)
28102	}
28103	}
28104
28105	/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28106	///
28107	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastd_epi32&expand=546)
28108	#[inline]
28109	#[target_feature(enable = "avx512f")]
28110	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28111	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28112	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28113	pub const fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
28114	unsafe {
28115	let broadcast: Simd = _mm512_broadcastd_epi32(a).as_i32x16();
28116	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x16()))
28117	}
28118	}
28119
28120	/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28121	///
28122	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastd_epi32&expand=547)
28123	#[inline]
28124	#[target_feature(enable = "avx512f")]
28125	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28126	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28127	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28128	pub const fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i {
28129	unsafe {
28130	let broadcast: Simd = _mm512_broadcastd_epi32(a).as_i32x16();
28131	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x16::ZERO))
28132	}
28133	}
28134
28135	/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28136	///
28137	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastd_epi32&expand=543)
28138	#[inline]
28139	#[target_feature(enable = "avx512f,avx512vl")]
28140	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28141	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28142	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28143	pub const fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
28144	unsafe {
28145	let broadcast: Simd = _mm256_broadcastd_epi32(a).as_i32x8();
28146	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x8()))
28147	}
28148	}
28149
28150	/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28151	///
28152	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastd_epi32&expand=544)
28153	#[inline]
28154	#[target_feature(enable = "avx512f,avx512vl")]
28155	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28156	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28157	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28158	pub const fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i {
28159	unsafe {
28160	let broadcast: Simd = _mm256_broadcastd_epi32(a).as_i32x8();
28161	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x8::ZERO))
28162	}
28163	}
28164
28165	/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28166	///
28167	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastd_epi32&expand=540)
28168	#[inline]
28169	#[target_feature(enable = "avx512f,avx512vl")]
28170	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28171	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28172	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28173	pub const fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
28174	unsafe {
28175	let broadcast: Simd = _mm_broadcastd_epi32(a).as_i32x4();
28176	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x4()))
28177	}
28178	}
28179
28180	/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28181	///
28182	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastd_epi32&expand=541)
28183	#[inline]
28184	#[target_feature(enable = "avx512f,avx512vl")]
28185	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28186	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28187	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28188	pub const fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i {
28189	unsafe {
28190	let broadcast: Simd = _mm_broadcastd_epi32(a).as_i32x4();
28191	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x4::ZERO))
28192	}
28193	}
28194
28195	/// Broadcast the low packed 64-bit integer from a to all elements of dst.
28196	///
28197	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastq_epi64&expand=560)
28198	#[inline]
28199	#[target_feature(enable = "avx512f")]
28200	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28201	#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastq
28202	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28203	pub const fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
28204	unsafe { simd_shuffle!(a, a, [`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`]) }
28205	}
28206
28207	/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28208	///
28209	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastq_epi64&expand=561)
28210	#[inline]
28211	#[target_feature(enable = "avx512f")]
28212	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28213	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28214	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28215	pub const fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
28216	unsafe {
28217	let broadcast: Simd = _mm512_broadcastq_epi64(a).as_i64x8();
28218	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x8()))
28219	}
28220	}
28221
28222	/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28223	///
28224	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastq_epi64&expand=562)
28225	#[inline]
28226	#[target_feature(enable = "avx512f")]
28227	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28228	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28229	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28230	pub const fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i {
28231	unsafe {
28232	let broadcast: Simd = _mm512_broadcastq_epi64(a).as_i64x8();
28233	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i64x8::ZERO))
28234	}
28235	}
28236
28237	/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28238	///
28239	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastq_epi64&expand=558)
28240	#[inline]
28241	#[target_feature(enable = "avx512f,avx512vl")]
28242	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28243	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28244	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28245	pub const fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
28246	unsafe {
28247	let broadcast: Simd = _mm256_broadcastq_epi64(a).as_i64x4();
28248	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x4()))
28249	}
28250	}
28251
28252	/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28253	///
28254	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastq_epi64&expand=559)
28255	#[inline]
28256	#[target_feature(enable = "avx512f,avx512vl")]
28257	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28258	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28259	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28260	pub const fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i {
28261	unsafe {
28262	let broadcast: Simd = _mm256_broadcastq_epi64(a).as_i64x4();
28263	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i64x4::ZERO))
28264	}
28265	}
28266
28267	/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28268	///
28269	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastq_epi64&expand=555)
28270	#[inline]
28271	#[target_feature(enable = "avx512f,avx512vl")]
28272	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28273	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28274	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28275	pub const fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
28276	unsafe {
28277	let broadcast: Simd = _mm_broadcastq_epi64(a).as_i64x2();
28278	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x2()))
28279	}
28280	}
28281
28282	/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28283	///
28284	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastq_epi64&expand=556)
28285	#[inline]
28286	#[target_feature(enable = "avx512f,avx512vl")]
28287	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28288	#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28289	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28290	pub const fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i {
28291	unsafe {
28292	let broadcast: Simd = _mm_broadcastq_epi64(a).as_i64x2();
28293	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i64x2::ZERO))
28294	}
28295	}
28296
28297	/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst.
28298	///
28299	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastss_ps&expand=578)
28300	#[inline]
28301	#[target_feature(enable = "avx512f")]
28302	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28303	#[cfg_attr(test, assert_instr(vbroadcastss))]
28304	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28305	pub const fn _mm512_broadcastss_ps(a: __m128) -> __m512 {
28306	unsafe { simd_shuffle!(a, a, [`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`]) }
28307	}
28308
28309	/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28310	///
28311	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastss_ps&expand=579)
28312	#[inline]
28313	#[target_feature(enable = "avx512f")]
28314	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28315	#[cfg_attr(test, assert_instr(vbroadcastss))]
28316	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28317	pub const fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 {
28318	unsafe {
28319	let broadcast: Simd = _mm512_broadcastss_ps(a).as_f32x16();
28320	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x16()))
28321	}
28322	}
28323
28324	/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28325	///
28326	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastss_ps&expand=580)
28327	#[inline]
28328	#[target_feature(enable = "avx512f")]
28329	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28330	#[cfg_attr(test, assert_instr(vbroadcastss))]
28331	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28332	pub const fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 {
28333	unsafe {
28334	let broadcast: Simd = _mm512_broadcastss_ps(a).as_f32x16();
28335	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x16::ZERO))
28336	}
28337	}
28338
28339	/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28340	///
28341	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastss_ps&expand=576)
28342	#[inline]
28343	#[target_feature(enable = "avx512f,avx512vl")]
28344	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28345	#[cfg_attr(test, assert_instr(vbroadcastss))]
28346	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28347	pub const fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> __m256 {
28348	unsafe {
28349	let broadcast: Simd = _mm256_broadcastss_ps(a).as_f32x8();
28350	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x8()))
28351	}
28352	}
28353
28354	/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28355	///
28356	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastss_ps&expand=577)
28357	#[inline]
28358	#[target_feature(enable = "avx512f,avx512vl")]
28359	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28360	#[cfg_attr(test, assert_instr(vbroadcastss))]
28361	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28362	pub const fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 {
28363	unsafe {
28364	let broadcast: Simd = _mm256_broadcastss_ps(a).as_f32x8();
28365	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x8::ZERO))
28366	}
28367	}
28368
28369	/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28370	///
28371	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastss_ps&expand=573)
28372	#[inline]
28373	#[target_feature(enable = "avx512f,avx512vl")]
28374	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28375	#[cfg_attr(test, assert_instr(vbroadcastss))]
28376	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28377	pub const fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
28378	unsafe {
28379	let broadcast: Simd = _mm_broadcastss_ps(a).as_f32x4();
28380	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x4()))
28381	}
28382	}
28383
28384	/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28385	///
28386	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastss_ps&expand=574)
28387	#[inline]
28388	#[target_feature(enable = "avx512f,avx512vl")]
28389	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28390	#[cfg_attr(test, assert_instr(vbroadcastss))]
28391	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28392	pub const fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 {
28393	unsafe {
28394	let broadcast: Simd = _mm_broadcastss_ps(a).as_f32x4();
28395	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x4::ZERO))
28396	}
28397	}
28398
28399	/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst.
28400	///
28401	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastsd_pd&expand=567)
28402	#[inline]
28403	#[target_feature(enable = "avx512f")]
28404	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28405	#[cfg_attr(test, assert_instr(vbroadcastsd))]
28406	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28407	pub const fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d {
28408	unsafe { simd_shuffle!(a, a, [`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`]) }
28409	}
28410
28411	/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28412	///
28413	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastsd_pd&expand=568)
28414	#[inline]
28415	#[target_feature(enable = "avx512f")]
28416	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28417	#[cfg_attr(test, assert_instr(vbroadcastsd))]
28418	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28419	pub const fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
28420	unsafe {
28421	let broadcast: Simd = _mm512_broadcastsd_pd(a).as_f64x8();
28422	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f64x8()))
28423	}
28424	}
28425
28426	/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28427	///
28428	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastsd_pd&expand=569)
28429	#[inline]
28430	#[target_feature(enable = "avx512f")]
28431	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28432	#[cfg_attr(test, assert_instr(vbroadcastsd))]
28433	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28434	pub const fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d {
28435	unsafe {
28436	let broadcast: Simd = _mm512_broadcastsd_pd(a).as_f64x8();
28437	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f64x8::ZERO))
28438	}
28439	}
28440
28441	/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28442	///
28443	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastsd_pd&expand=565)
28444	#[inline]
28445	#[target_feature(enable = "avx512f,avx512vl")]
28446	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28447	#[cfg_attr(test, assert_instr(vbroadcastsd))]
28448	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28449	pub const fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
28450	unsafe {
28451	let broadcast: Simd = _mm256_broadcastsd_pd(a).as_f64x4();
28452	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f64x4()))
28453	}
28454	}
28455
28456	/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28457	///
28458	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastsd_pd&expand=566)
28459	#[inline]
28460	#[target_feature(enable = "avx512f,avx512vl")]
28461	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28462	#[cfg_attr(test, assert_instr(vbroadcastsd))]
28463	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28464	pub const fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d {
28465	unsafe {
28466	let broadcast: Simd = _mm256_broadcastsd_pd(a).as_f64x4();
28467	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f64x4::ZERO))
28468	}
28469	}
28470
28471	/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
28472	///
28473	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x4&expand=510)
28474	#[inline]
28475	#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
28476	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28477	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28478	pub const fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i {
28479	unsafe {
28480	let a: Simd = a.as_i32x4();
28481	let ret: i32x16 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`]);
28482	transmute(src:ret)
28483	}
28484	}
28485
28486	/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28487	///
28488	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x4&expand=511)
28489	#[inline]
28490	#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
28491	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28492	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28493	pub const fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
28494	unsafe {
28495	let broadcast: Simd = _mm512_broadcast_i32x4(a).as_i32x16();
28496	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x16()))
28497	}
28498	}
28499
28500	/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28501	///
28502	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x4&expand=512)
28503	#[inline]
28504	#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
28505	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28506	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28507	pub const fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i {
28508	unsafe {
28509	let broadcast: Simd = _mm512_broadcast_i32x4(a).as_i32x16();
28510	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x16::ZERO))
28511	}
28512	}
28513
28514	/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
28515	///
28516	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x4&expand=507)
28517	#[inline]
28518	#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
28519	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28520	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28521	pub const fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i {
28522	unsafe {
28523	let a: Simd = a.as_i32x4();
28524	let ret: i32x8 = simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`]);
28525	transmute(src:ret)
28526	}
28527	}
28528
28529	/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28530	///
28531	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x4&expand=508)
28532	#[inline]
28533	#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
28534	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28535	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28536	pub const fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
28537	unsafe {
28538	let broadcast: Simd = _mm256_broadcast_i32x4(a).as_i32x8();
28539	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x8()))
28540	}
28541	}
28542
28543	/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28544	///
28545	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x4&expand=509)
28546	#[inline]
28547	#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
28548	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28549	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28550	pub const fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i {
28551	unsafe {
28552	let broadcast: Simd = _mm256_broadcast_i32x4(a).as_i32x8();
28553	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x8::ZERO))
28554	}
28555	}
28556
28557	/// Broadcast the 4 packed 64-bit integers from a to all elements of dst.
28558	///
28559	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x4&expand=522)
28560	#[inline]
28561	#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
28562	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28563	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28564	pub const fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i {
28565	unsafe { simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`]) }
28566	}
28567
28568	/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28569	///
28570	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x4&expand=523)
28571	#[inline]
28572	#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
28573	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28574	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28575	pub const fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
28576	unsafe {
28577	let broadcast: Simd = _mm512_broadcast_i64x4(a).as_i64x8();
28578	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x8()))
28579	}
28580	}
28581
28582	/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28583	///
28584	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x4&expand=524)
28585	#[inline]
28586	#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
28587	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28588	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28589	pub const fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i {
28590	unsafe {
28591	let broadcast: Simd = _mm512_broadcast_i64x4(a).as_i64x8();
28592	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i64x8::ZERO))
28593	}
28594	}
28595
28596	/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
28597	///
28598	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x4&expand=483)
28599	#[inline]
28600	#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshuf
28601	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28602	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28603	pub const fn _mm512_broadcast_f32x4(a: __m128) -> __m512 {
28604	unsafe { simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`]) }
28605	}
28606
28607	/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28608	///
28609	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x4&expand=484)
28610	#[inline]
28611	#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
28612	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28613	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28614	pub const fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 {
28615	unsafe {
28616	let broadcast: Simd = _mm512_broadcast_f32x4(a).as_f32x16();
28617	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x16()))
28618	}
28619	}
28620
28621	/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28622	///
28623	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x4&expand=485)
28624	#[inline]
28625	#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
28626	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28627	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28628	pub const fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 {
28629	unsafe {
28630	let broadcast: Simd = _mm512_broadcast_f32x4(a).as_f32x16();
28631	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x16::ZERO))
28632	}
28633	}
28634
28635	/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
28636	///
28637	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x4&expand=480)
28638	#[inline]
28639	#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshuf
28640	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28641	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28642	pub const fn _mm256_broadcast_f32x4(a: __m128) -> __m256 {
28643	unsafe { simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`]) }
28644	}
28645
28646	/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28647	///
28648	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x4&expand=481)
28649	#[inline]
28650	#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
28651	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28652	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28653	pub const fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) -> __m256 {
28654	unsafe {
28655	let broadcast: Simd = _mm256_broadcast_f32x4(a).as_f32x8();
28656	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x8()))
28657	}
28658	}
28659
28660	/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28661	///
28662	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x4&expand=482)
28663	#[inline]
28664	#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
28665	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28666	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28667	pub const fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 {
28668	unsafe {
28669	let broadcast: Simd = _mm256_broadcast_f32x4(a).as_f32x8();
28670	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x8::ZERO))
28671	}
28672	}
28673
28674	/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst.
28675	///
28676	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x4&expand=495)
28677	#[inline]
28678	#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vperm
28679	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28680	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28681	pub const fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d {
28682	unsafe { simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `0`, `1`, `2`, `3`]) }
28683	}
28684
28685	/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28686	///
28687	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x4&expand=496)
28688	#[inline]
28689	#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
28690	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28691	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28692	pub const fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d {
28693	unsafe {
28694	let broadcast: Simd = _mm512_broadcast_f64x4(a).as_f64x8();
28695	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f64x8()))
28696	}
28697	}
28698
28699	/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28700	///
28701	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x4&expand=497)
28702	#[inline]
28703	#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
28704	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28705	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28706	pub const fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d {
28707	unsafe {
28708	let broadcast: Simd = _mm512_broadcast_f64x4(a).as_f64x8();
28709	transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f64x8::ZERO))
28710	}
28711	}
28712
28713	/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
28714	///
28715	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi32&expand=435)
28716	#[inline]
28717	#[target_feature(enable = "avx512f")]
28718	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28719	#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
28720	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28721	pub const fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28722	unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i32x16(), no:a.as_i32x16())) }
28723	}
28724
28725	/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
28726	///
28727	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi32&expand=434)
28728	#[inline]
28729	#[target_feature(enable = "avx512f,avx512vl")]
28730	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28731	#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
28732	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28733	pub const fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28734	unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i32x8(), no:a.as_i32x8())) }
28735	}
28736
28737	/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
28738	///
28739	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi32&expand=432)
28740	#[inline]
28741	#[target_feature(enable = "avx512f,avx512vl")]
28742	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28743	#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
28744	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28745	pub const fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28746	unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i32x4(), no:a.as_i32x4())) }
28747	}
28748
28749	/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
28750	///
28751	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi64&expand=438)
28752	#[inline]
28753	#[target_feature(enable = "avx512f")]
28754	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28755	#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
28756	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28757	pub const fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28758	unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i64x8(), no:a.as_i64x8())) }
28759	}
28760
28761	/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
28762	///
28763	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi64&expand=437)
28764	#[inline]
28765	#[target_feature(enable = "avx512f,avx512vl")]
28766	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28767	#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
28768	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28769	pub const fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28770	unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i64x4(), no:a.as_i64x4())) }
28771	}
28772
28773	/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
28774	///
28775	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi64&expand=436)
28776	#[inline]
28777	#[target_feature(enable = "avx512f,avx512vl")]
28778	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28779	#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
28780	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28781	pub const fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28782	unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i64x2(), no:a.as_i64x2())) }
28783	}
28784
28785	/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28786	///
28787	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_ps&expand=451)
28788	#[inline]
28789	#[target_feature(enable = "avx512f")]
28790	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28791	#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
28792	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28793	pub const fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
28794	unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x16(), no:a.as_f32x16())) }
28795	}
28796
28797	/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28798	///
28799	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_ps&expand=450)
28800	#[inline]
28801	#[target_feature(enable = "avx512f,avx512vl")]
28802	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28803	#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
28804	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28805	pub const fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
28806	unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x8(), no:a.as_f32x8())) }
28807	}
28808
28809	/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28810	///
28811	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_ps&expand=448)
28812	#[inline]
28813	#[target_feature(enable = "avx512f,avx512vl")]
28814	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28815	#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
28816	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28817	pub const fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
28818	unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x4(), no:a.as_f32x4())) }
28819	}
28820
28821	/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28822	///
28823	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_pd&expand=446)
28824	#[inline]
28825	#[target_feature(enable = "avx512f")]
28826	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28827	#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
28828	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28829	pub const fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
28830	unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x8(), no:a.as_f64x8())) }
28831	}
28832
28833	/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28834	///
28835	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_pd&expand=445)
28836	#[inline]
28837	#[target_feature(enable = "avx512f,avx512vl")]
28838	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28839	#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
28840	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28841	pub const fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
28842	unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x4(), no:a.as_f64x4())) }
28843	}
28844
28845	/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28846	///
28847	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_pd&expand=443)
28848	#[inline]
28849	#[target_feature(enable = "avx512f,avx512vl")]
28850	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28851	#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
28852	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28853	pub const fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
28854	unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x2(), no:a.as_f64x2())) }
28855	}
28856
28857	/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst.
28858	///
28859	/// <div class="warning">Only lowest <strong>4 bits</strong> are used from the mask (shift at maximum by 60 bytes)!</div>
28860	///
28861	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi32&expand=245)
28862	#[inline]
28863	#[target_feature(enable = "avx512f")]
28864	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28865	#[cfg_attr(test, assert_instr(valignd, IMM8 = `1`))]
28866	#[rustc_legacy_const_generics(`2`)]
28867	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28868	pub const fn _mm512_alignr_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
28869	unsafe {
28870	static_assert_uimm_bits!(IMM8, `8`);
28871	let a = a.as_i32x16();
28872	let b = b.as_i32x16();
28873	let imm8: i32 = IMM8 % `16`;
28874	let r: i32x16 = match imm8 {
28875	`0` => simd_shuffle!(
28876	a,
28877	b,
28878	[
28879	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
28880	],
28881	),
28882	`1` => simd_shuffle!(
28883	a,
28884	b,
28885	[
28886	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`,
28887	],
28888	),
28889	`2` => simd_shuffle!(
28890	a,
28891	b,
28892	[`18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`],
28893	),
28894	`3` => simd_shuffle!(
28895	a,
28896	b,
28897	[`19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`],
28898	),
28899	`4` => simd_shuffle!(
28900	a,
28901	b,
28902	[`20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`],
28903	),
28904	`5` => simd_shuffle!(
28905	a,
28906	b,
28907	[`21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`],
28908	),
28909	`6` => simd_shuffle!(
28910	a,
28911	b,
28912	[`22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`],
28913	),
28914	`7` => simd_shuffle!(
28915	a,
28916	b,
28917	[`23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`],
28918	),
28919	`8` => simd_shuffle!(
28920	a,
28921	b,
28922	[`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`],
28923	),
28924	`9` => simd_shuffle!(
28925	a,
28926	b,
28927	[`25`, `26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`],
28928	),
28929	`10` => simd_shuffle!(a, b, [`26`, `27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`]),
28930	`11` => simd_shuffle!(a, b, [`27`, `28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`]),
28931	`12` => simd_shuffle!(a, b, [`28`, `29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`]),
28932	`13` => simd_shuffle!(a, b, [`29`, `30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`]),
28933	`14` => simd_shuffle!(a, b, [`30`, `31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`]),
28934	`15` => simd_shuffle!(a, b, [`31`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`]),
28935	_ => unreachable_unchecked(),
28936	};
28937	transmute(r)
28938	}
28939	}
28940
28941	/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28942	///
28943	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi32&expand=246)
28944	#[inline]
28945	#[target_feature(enable = "avx512f")]
28946	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28947	#[cfg_attr(test, assert_instr(valignd, IMM8 = `1`))]
28948	#[rustc_legacy_const_generics(`4`)]
28949	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28950	pub const fn _mm512_mask_alignr_epi32<const IMM8: i32>(
28951	src: __m512i,
28952	k: __mmask16,
28953	a: __m512i,
28954	b: __m512i,
28955	) -> __m512i {
28956	unsafe {
28957	static_assert_uimm_bits!(IMM8, `8`);
28958	let r: __m512i = _mm512_alignr_epi32::<IMM8>(a, b);
28959	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
28960	}
28961	}
28962
28963	/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and stores the low 64 bytes (16 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28964	///
28965	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi32&expand=247)
28966	#[inline]
28967	#[target_feature(enable = "avx512f")]
28968	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28969	#[cfg_attr(test, assert_instr(valignd, IMM8 = `1`))]
28970	#[rustc_legacy_const_generics(`3`)]
28971	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28972	pub const fn _mm512_maskz_alignr_epi32<const IMM8: i32>(
28973	k: __mmask16,
28974	a: __m512i,
28975	b: __m512i,
28976	) -> __m512i {
28977	unsafe {
28978	static_assert_uimm_bits!(IMM8, `8`);
28979	let r: __m512i = _mm512_alignr_epi32::<IMM8>(a, b);
28980	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:i32x16::ZERO))
28981	}
28982	}
28983
28984	/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst.
28985	///
28986	/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 28 bytes)!</div>
28987	///
28988	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi32&expand=242)
28989	#[inline]
28990	#[target_feature(enable = "avx512f,avx512vl")]
28991	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28992	#[cfg_attr(test, assert_instr(valignd, IMM8 = `1`))]
28993	#[rustc_legacy_const_generics(`2`)]
28994	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28995	pub const fn _mm256_alignr_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
28996	unsafe {
28997	static_assert_uimm_bits!(IMM8, `8`);
28998	let a: Simd = a.as_i32x8();
28999	let b: Simd = b.as_i32x8();
29000	let imm8: i32 = IMM8 % `8`;
29001	let r: i32x8 = match imm8 {
29002	`0` => simd_shuffle!(a, b, [`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]),
29003	`1` => simd_shuffle!(a, b, [`9`, `10`, `11`, `12`, `13`, `14`, `15`, `0`]),
29004	`2` => simd_shuffle!(a, b, [`10`, `11`, `12`, `13`, `14`, `15`, `0`, `1`]),
29005	`3` => simd_shuffle!(a, b, [`11`, `12`, `13`, `14`, `15`, `0`, `1`, `2`]),
29006	`4` => simd_shuffle!(a, b, [`12`, `13`, `14`, `15`, `0`, `1`, `2`, `3`]),
29007	`5` => simd_shuffle!(a, b, [`13`, `14`, `15`, `0`, `1`, `2`, `3`, `4`]),
29008	`6` => simd_shuffle!(a, b, [`14`, `15`, `0`, `1`, `2`, `3`, `4`, `5`]),
29009	`7` => simd_shuffle!(a, b, [`15`, `0`, `1`, `2`, `3`, `4`, `5`, `6`]),
29010	_ => unreachable_unchecked(),
29011	};
29012	transmute(src:r)
29013	}
29014	}
29015
29016	/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29017	///
29018	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi32&expand=243)
29019	#[inline]
29020	#[target_feature(enable = "avx512f,avx512vl")]
29021	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29022	#[cfg_attr(test, assert_instr(valignd, IMM8 = `1`))]
29023	#[rustc_legacy_const_generics(`4`)]
29024	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29025	pub const fn _mm256_mask_alignr_epi32<const IMM8: i32>(
29026	src: __m256i,
29027	k: __mmask8,
29028	a: __m256i,
29029	b: __m256i,
29030	) -> __m256i {
29031	unsafe {
29032	static_assert_uimm_bits!(IMM8, `8`);
29033	let r: __m256i = _mm256_alignr_epi32::<IMM8>(a, b);
29034	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
29035	}
29036	}
29037
29038	/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29039	///
29040	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi32&expand=244)
29041	#[inline]
29042	#[target_feature(enable = "avx512f,avx512vl")]
29043	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29044	#[cfg_attr(test, assert_instr(valignd, IMM8 = `1`))]
29045	#[rustc_legacy_const_generics(`3`)]
29046	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29047	pub const fn _mm256_maskz_alignr_epi32<const IMM8: i32>(
29048	k: __mmask8,
29049	a: __m256i,
29050	b: __m256i,
29051	) -> __m256i {
29052	unsafe {
29053	static_assert_uimm_bits!(IMM8, `8`);
29054	let r: __m256i = _mm256_alignr_epi32::<IMM8>(a, b);
29055	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:i32x8::ZERO))
29056	}
29057	}
29058
29059	/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst.
29060	///
29061	/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 12 bytes)!</div>
29062	///
29063	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi32&expand=239)
29064	#[inline]
29065	#[target_feature(enable = "avx512f,avx512vl")]
29066	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29067	#[cfg_attr(test, assert_instr(vpalignr, IMM8 = `1`))] //should be valignd
29068	#[rustc_legacy_const_generics(`2`)]
29069	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29070	pub const fn _mm_alignr_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
29071	unsafe {
29072	static_assert_uimm_bits!(IMM8, `8`);
29073	let a: Simd = a.as_i32x4();
29074	let b: Simd = b.as_i32x4();
29075	let imm8: i32 = IMM8 % `4`;
29076	let r: i32x4 = match imm8 {
29077	`0` => simd_shuffle!(a, b, [`4`, `5`, `6`, `7`]),
29078	`1` => simd_shuffle!(a, b, [`5`, `6`, `7`, `0`]),
29079	`2` => simd_shuffle!(a, b, [`6`, `7`, `0`, `1`]),
29080	`3` => simd_shuffle!(a, b, [`7`, `0`, `1`, `2`]),
29081	_ => unreachable_unchecked(),
29082	};
29083	transmute(src:r)
29084	}
29085	}
29086
29087	/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29088	///
29089	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi32&expand=240)
29090	#[inline]
29091	#[target_feature(enable = "avx512f,avx512vl")]
29092	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29093	#[cfg_attr(test, assert_instr(valignd, IMM8 = `1`))]
29094	#[rustc_legacy_const_generics(`4`)]
29095	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29096	pub const fn _mm_mask_alignr_epi32<const IMM8: i32>(
29097	src: __m128i,
29098	k: __mmask8,
29099	a: __m128i,
29100	b: __m128i,
29101	) -> __m128i {
29102	unsafe {
29103	static_assert_uimm_bits!(IMM8, `8`);
29104	let r: __m128i = _mm_alignr_epi32::<IMM8>(a, b);
29105	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
29106	}
29107	}
29108
29109	/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29110	///
29111	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi32&expand=241)
29112	#[inline]
29113	#[target_feature(enable = "avx512f,avx512vl")]
29114	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29115	#[cfg_attr(test, assert_instr(valignd, IMM8 = `1`))]
29116	#[rustc_legacy_const_generics(`3`)]
29117	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29118	pub const fn _mm_maskz_alignr_epi32<const IMM8: i32>(
29119	k: __mmask8,
29120	a: __m128i,
29121	b: __m128i,
29122	) -> __m128i {
29123	unsafe {
29124	static_assert_uimm_bits!(IMM8, `8`);
29125	let r: __m128i = _mm_alignr_epi32::<IMM8>(a, b);
29126	transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:i32x4::ZERO))
29127	}
29128	}
29129
29130	/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst.
29131	///
29132	/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 56 bytes)!</div>
29133	///
29134	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi64&expand=254)
29135	#[inline]
29136	#[target_feature(enable = "avx512f")]
29137	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29138	#[cfg_attr(test, assert_instr(valignq, IMM8 = `1`))]
29139	#[rustc_legacy_const_generics(`2`)]
29140	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29141	pub const fn _mm512_alignr_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
29142	unsafe {
29143	static_assert_uimm_bits!(IMM8, `8`);
29144	let imm8: i32 = IMM8 % `8`;
29145	let r: i64x8 = match imm8 {
29146	`0` => simd_shuffle!(a, b, [`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]),
29147	`1` => simd_shuffle!(a, b, [`9`, `10`, `11`, `12`, `13`, `14`, `15`, `0`]),
29148	`2` => simd_shuffle!(a, b, [`10`, `11`, `12`, `13`, `14`, `15`, `0`, `1`]),
29149	`3` => simd_shuffle!(a, b, [`11`, `12`, `13`, `14`, `15`, `0`, `1`, `2`]),
29150	`4` => simd_shuffle!(a, b, [`12`, `13`, `14`, `15`, `0`, `1`, `2`, `3`]),
29151	`5` => simd_shuffle!(a, b, [`13`, `14`, `15`, `0`, `1`, `2`, `3`, `4`]),
29152	`6` => simd_shuffle!(a, b, [`14`, `15`, `0`, `1`, `2`, `3`, `4`, `5`]),
29153	`7` => simd_shuffle!(a, b, [`15`, `0`, `1`, `2`, `3`, `4`, `5`, `6`]),
29154	_ => unreachable_unchecked(),
29155	};
29156	transmute(src:r)
29157	}
29158	}
29159
29160	/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29161	///
29162	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi64&expand=255)
29163	#[inline]
29164	#[target_feature(enable = "avx512f")]
29165	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29166	#[cfg_attr(test, assert_instr(valignq, IMM8 = `1`))]
29167	#[rustc_legacy_const_generics(`4`)]
29168	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29169	pub const fn _mm512_mask_alignr_epi64<const IMM8: i32>(
29170	src: __m512i,
29171	k: __mmask8,
29172	a: __m512i,
29173	b: __m512i,
29174	) -> __m512i {
29175	unsafe {
29176	static_assert_uimm_bits!(IMM8, `8`);
29177	let r: __m512i = _mm512_alignr_epi64::<IMM8>(a, b);
29178	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
29179	}
29180	}
29181
29182	/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and stores the low 64 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29183	///
29184	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi64&expand=256)
29185	#[inline]
29186	#[target_feature(enable = "avx512f")]
29187	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29188	#[cfg_attr(test, assert_instr(valignq, IMM8 = `1`))]
29189	#[rustc_legacy_const_generics(`3`)]
29190	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29191	pub const fn _mm512_maskz_alignr_epi64<const IMM8: i32>(
29192	k: __mmask8,
29193	a: __m512i,
29194	b: __m512i,
29195	) -> __m512i {
29196	unsafe {
29197	static_assert_uimm_bits!(IMM8, `8`);
29198	let r: __m512i = _mm512_alignr_epi64::<IMM8>(a, b);
29199	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:i64x8::ZERO))
29200	}
29201	}
29202
29203	/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst.
29204	///
29205	/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 24 bytes)!</div>
29206	///
29207	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi64&expand=251)
29208	#[inline]
29209	#[target_feature(enable = "avx512f,avx512vl")]
29210	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29211	#[cfg_attr(test, assert_instr(valignq, IMM8 = `1`))]
29212	#[rustc_legacy_const_generics(`2`)]
29213	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29214	pub const fn _mm256_alignr_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
29215	unsafe {
29216	static_assert_uimm_bits!(IMM8, `8`);
29217	let imm8: i32 = IMM8 % `4`;
29218	let r: i64x4 = match imm8 {
29219	`0` => simd_shuffle!(a, b, [`4`, `5`, `6`, `7`]),
29220	`1` => simd_shuffle!(a, b, [`5`, `6`, `7`, `0`]),
29221	`2` => simd_shuffle!(a, b, [`6`, `7`, `0`, `1`]),
29222	`3` => simd_shuffle!(a, b, [`7`, `0`, `1`, `2`]),
29223	_ => unreachable_unchecked(),
29224	};
29225	transmute(src:r)
29226	}
29227	}
29228
29229	/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29230	///
29231	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi64&expand=252)
29232	#[inline]
29233	#[target_feature(enable = "avx512f,avx512vl")]
29234	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29235	#[cfg_attr(test, assert_instr(valignq, IMM8 = `1`))]
29236	#[rustc_legacy_const_generics(`4`)]
29237	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29238	pub const fn _mm256_mask_alignr_epi64<const IMM8: i32>(
29239	src: __m256i,
29240	k: __mmask8,
29241	a: __m256i,
29242	b: __m256i,
29243	) -> __m256i {
29244	unsafe {
29245	static_assert_uimm_bits!(IMM8, `8`);
29246	let r: __m256i = _mm256_alignr_epi64::<IMM8>(a, b);
29247	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
29248	}
29249	}
29250
29251	/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29252	///
29253	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi64&expand=253)
29254	#[inline]
29255	#[target_feature(enable = "avx512f,avx512vl")]
29256	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29257	#[cfg_attr(test, assert_instr(valignq, IMM8 = `1`))]
29258	#[rustc_legacy_const_generics(`3`)]
29259	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29260	pub const fn _mm256_maskz_alignr_epi64<const IMM8: i32>(
29261	k: __mmask8,
29262	a: __m256i,
29263	b: __m256i,
29264	) -> __m256i {
29265	unsafe {
29266	static_assert_uimm_bits!(IMM8, `8`);
29267	let r: __m256i = _mm256_alignr_epi64::<IMM8>(a, b);
29268	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:i64x4::ZERO))
29269	}
29270	}
29271
29272	/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst.
29273	///
29274	/// <div class="warning">Only lowest <strong>bit</strong> is used from the mask (shift at maximum by 8 bytes)!</div>
29275	///
29276	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi64&expand=248)
29277	#[inline]
29278	#[target_feature(enable = "avx512f,avx512vl")]
29279	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29280	#[cfg_attr(test, assert_instr(vpalignr, IMM8 = `1`))] //should be valignq
29281	#[rustc_legacy_const_generics(`2`)]
29282	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29283	pub const fn _mm_alignr_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
29284	unsafe {
29285	static_assert_uimm_bits!(IMM8, `8`);
29286	let imm8: i32 = IMM8 % `2`;
29287	let r: i64x2 = match imm8 {
29288	`0` => simd_shuffle!(a, b, [`2`, `3`]),
29289	`1` => simd_shuffle!(a, b, [`3`, `0`]),
29290	_ => unreachable_unchecked(),
29291	};
29292	transmute(src:r)
29293	}
29294	}
29295
29296	/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29297	///
29298	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi64&expand=249)
29299	#[inline]
29300	#[target_feature(enable = "avx512f,avx512vl")]
29301	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29302	#[cfg_attr(test, assert_instr(valignq, IMM8 = `1`))]
29303	#[rustc_legacy_const_generics(`4`)]
29304	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29305	pub const fn _mm_mask_alignr_epi64<const IMM8: i32>(
29306	src: __m128i,
29307	k: __mmask8,
29308	a: __m128i,
29309	b: __m128i,
29310	) -> __m128i {
29311	unsafe {
29312	static_assert_uimm_bits!(IMM8, `8`);
29313	let r: __m128i = _mm_alignr_epi64::<IMM8>(a, b);
29314	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x2(), no:src.as_i64x2()))
29315	}
29316	}
29317
29318	/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29319	///
29320	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi64&expand=250)
29321	#[inline]
29322	#[target_feature(enable = "avx512f,avx512vl")]
29323	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29324	#[cfg_attr(test, assert_instr(valignq, IMM8 = `1`))]
29325	#[rustc_legacy_const_generics(`3`)]
29326	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29327	pub const fn _mm_maskz_alignr_epi64<const IMM8: i32>(
29328	k: __mmask8,
29329	a: __m128i,
29330	b: __m128i,
29331	) -> __m128i {
29332	unsafe {
29333	static_assert_uimm_bits!(IMM8, `8`);
29334	let r: __m128i = _mm_alignr_epi64::<IMM8>(a, b);
29335	transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x2(), no:i64x2::ZERO))
29336	}
29337	}
29338
29339	/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst.
29340	///
29341	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi32&expand=272)
29342	#[inline]
29343	#[target_feature(enable = "avx512f")]
29344	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29345	#[cfg_attr(test, assert_instr(vpandq))] //should be vpandd, but generate vpandq
29346	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29347	pub const fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
29348	unsafe { transmute(src:simd_and(x:a.as_i32x16(), y:b.as_i32x16())) }
29349	}
29350
29351	/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29352	///
29353	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi32&expand=273)
29354	#[inline]
29355	#[target_feature(enable = "avx512f")]
29356	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29357	#[cfg_attr(test, assert_instr(vpandd))]
29358	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29359	pub const fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29360	unsafe {
29361	let and: Simd = _mm512_and_epi32(a, b).as_i32x16();
29362	transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i32x16()))
29363	}
29364	}
29365
29366	/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29367	///
29368	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi32&expand=274)
29369	#[inline]
29370	#[target_feature(enable = "avx512f")]
29371	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29372	#[cfg_attr(test, assert_instr(vpandd))]
29373	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29374	pub const fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29375	unsafe {
29376	let and: Simd = _mm512_and_epi32(a, b).as_i32x16();
29377	transmute(src:simd_select_bitmask(m:k, yes:and, no:i32x16::ZERO))
29378	}
29379	}
29380
29381	/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29382	///
29383	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi32&expand=270)
29384	#[inline]
29385	#[target_feature(enable = "avx512f,avx512vl")]
29386	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29387	#[cfg_attr(test, assert_instr(vpandd))]
29388	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29389	pub const fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29390	unsafe {
29391	let and: Simd = simd_and(x:a.as_i32x8(), y:b.as_i32x8());
29392	transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i32x8()))
29393	}
29394	}
29395
29396	/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29397	///
29398	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi32&expand=271)
29399	#[inline]
29400	#[target_feature(enable = "avx512f,avx512vl")]
29401	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29402	#[cfg_attr(test, assert_instr(vpandd))]
29403	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29404	pub const fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29405	unsafe {
29406	let and: Simd = simd_and(x:a.as_i32x8(), y:b.as_i32x8());
29407	transmute(src:simd_select_bitmask(m:k, yes:and, no:i32x8::ZERO))
29408	}
29409	}
29410
29411	/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29412	///
29413	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi32&expand=268)
29414	#[inline]
29415	#[target_feature(enable = "avx512f,avx512vl")]
29416	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29417	#[cfg_attr(test, assert_instr(vpandd))]
29418	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29419	pub const fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29420	unsafe {
29421	let and: Simd = simd_and(x:a.as_i32x4(), y:b.as_i32x4());
29422	transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i32x4()))
29423	}
29424	}
29425
29426	/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29427	///
29428	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi32&expand=269)
29429	#[inline]
29430	#[target_feature(enable = "avx512f,avx512vl")]
29431	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29432	#[cfg_attr(test, assert_instr(vpandd))]
29433	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29434	pub const fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29435	unsafe {
29436	let and: Simd = simd_and(x:a.as_i32x4(), y:b.as_i32x4());
29437	transmute(src:simd_select_bitmask(m:k, yes:and, no:i32x4::ZERO))
29438	}
29439	}
29440
29441	/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst.
29442	///
29443	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi64&expand=279)
29444	#[inline]
29445	#[target_feature(enable = "avx512f")]
29446	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29447	#[cfg_attr(test, assert_instr(vpandq))]
29448	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29449	pub const fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
29450	unsafe { transmute(src:simd_and(x:a.as_i64x8(), y:b.as_i64x8())) }
29451	}
29452
29453	/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29454	///
29455	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi64&expand=280)
29456	#[inline]
29457	#[target_feature(enable = "avx512f")]
29458	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29459	#[cfg_attr(test, assert_instr(vpandq))]
29460	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29461	pub const fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29462	unsafe {
29463	let and: Simd = _mm512_and_epi64(a, b).as_i64x8();
29464	transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i64x8()))
29465	}
29466	}
29467
29468	/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29469	///
29470	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi64&expand=281)
29471	#[inline]
29472	#[target_feature(enable = "avx512f")]
29473	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29474	#[cfg_attr(test, assert_instr(vpandq))]
29475	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29476	pub const fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29477	unsafe {
29478	let and: Simd = _mm512_and_epi64(a, b).as_i64x8();
29479	transmute(src:simd_select_bitmask(m:k, yes:and, no:i64x8::ZERO))
29480	}
29481	}
29482
29483	/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29484	///
29485	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi64&expand=277)
29486	#[inline]
29487	#[target_feature(enable = "avx512f,avx512vl")]
29488	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29489	#[cfg_attr(test, assert_instr(vpandq))]
29490	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29491	pub const fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29492	unsafe {
29493	let and: Simd = simd_and(x:a.as_i64x4(), y:b.as_i64x4());
29494	transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i64x4()))
29495	}
29496	}
29497
29498	/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29499	///
29500	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi64&expand=278)
29501	#[inline]
29502	#[target_feature(enable = "avx512f,avx512vl")]
29503	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29504	#[cfg_attr(test, assert_instr(vpandq))]
29505	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29506	pub const fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29507	unsafe {
29508	let and: Simd = simd_and(x:a.as_i64x4(), y:b.as_i64x4());
29509	transmute(src:simd_select_bitmask(m:k, yes:and, no:i64x4::ZERO))
29510	}
29511	}
29512
29513	/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29514	///
29515	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi64&expand=275)
29516	#[inline]
29517	#[target_feature(enable = "avx512f,avx512vl")]
29518	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29519	#[cfg_attr(test, assert_instr(vpandq))]
29520	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29521	pub const fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29522	unsafe {
29523	let and: Simd = simd_and(x:a.as_i64x2(), y:b.as_i64x2());
29524	transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i64x2()))
29525	}
29526	}
29527
29528	/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29529	///
29530	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi64&expand=276)
29531	#[inline]
29532	#[target_feature(enable = "avx512f,avx512vl")]
29533	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29534	#[cfg_attr(test, assert_instr(vpandq))]
29535	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29536	pub const fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29537	unsafe {
29538	let and: Simd = simd_and(x:a.as_i64x2(), y:b.as_i64x2());
29539	transmute(src:simd_select_bitmask(m:k, yes:and, no:i64x2::ZERO))
29540	}
29541	}
29542
29543	/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst.
29544	///
29545	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_si512&expand=302)
29546	#[inline]
29547	#[target_feature(enable = "avx512f")]
29548	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29549	#[cfg_attr(test, assert_instr(vpandq))]
29550	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29551	pub const fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
29552	unsafe { transmute(src:simd_and(x:a.as_i32x16(), y:b.as_i32x16())) }
29553	}
29554
29555	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
29556	///
29557	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi32&expand=4042)
29558	#[inline]
29559	#[target_feature(enable = "avx512f")]
29560	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29561	#[cfg_attr(test, assert_instr(vporq))]
29562	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29563	pub const fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
29564	unsafe { transmute(src:simd_or(x:a.as_i32x16(), y:b.as_i32x16())) }
29565	}
29566
29567	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29568	///
29569	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi32&expand=4040)
29570	#[inline]
29571	#[target_feature(enable = "avx512f")]
29572	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29573	#[cfg_attr(test, assert_instr(vpord))]
29574	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29575	pub const fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29576	unsafe {
29577	let or: Simd = _mm512_or_epi32(a, b).as_i32x16();
29578	transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i32x16()))
29579	}
29580	}
29581
29582	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29583	///
29584	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi32&expand=4041)
29585	#[inline]
29586	#[target_feature(enable = "avx512f")]
29587	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29588	#[cfg_attr(test, assert_instr(vpord))]
29589	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29590	pub const fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29591	unsafe {
29592	let or: Simd = _mm512_or_epi32(a, b).as_i32x16();
29593	transmute(src:simd_select_bitmask(m:k, yes:or, no:i32x16::ZERO))
29594	}
29595	}
29596
29597	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
29598	///
29599	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi32&expand=4039)
29600	#[inline]
29601	#[target_feature(enable = "avx512f,avx512vl")]
29602	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29603	#[cfg_attr(test, assert_instr(vor))] //should be vpord
29604	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29605	pub const fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i {
29606	unsafe { transmute(src:simd_or(x:a.as_i32x8(), y:b.as_i32x8())) }
29607	}
29608
29609	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29610	///
29611	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi32&expand=4037)
29612	#[inline]
29613	#[target_feature(enable = "avx512f,avx512vl")]
29614	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29615	#[cfg_attr(test, assert_instr(vpord))]
29616	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29617	pub const fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29618	unsafe {
29619	let or: Simd = _mm256_or_epi32(a, b).as_i32x8();
29620	transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i32x8()))
29621	}
29622	}
29623
29624	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29625	///
29626	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi32&expand=4038)
29627	#[inline]
29628	#[target_feature(enable = "avx512f,avx512vl")]
29629	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29630	#[cfg_attr(test, assert_instr(vpord))]
29631	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29632	pub const fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29633	unsafe {
29634	let or: Simd = _mm256_or_epi32(a, b).as_i32x8();
29635	transmute(src:simd_select_bitmask(m:k, yes:or, no:i32x8::ZERO))
29636	}
29637	}
29638
29639	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
29640	///
29641	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi32&expand=4036)
29642	#[inline]
29643	#[target_feature(enable = "avx512f,avx512vl")]
29644	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29645	#[cfg_attr(test, assert_instr(vor))] //should be vpord
29646	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29647	pub const fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i {
29648	unsafe { transmute(src:simd_or(x:a.as_i32x4(), y:b.as_i32x4())) }
29649	}
29650
29651	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29652	///
29653	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi32&expand=4034)
29654	#[inline]
29655	#[target_feature(enable = "avx512f,avx512vl")]
29656	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29657	#[cfg_attr(test, assert_instr(vpord))]
29658	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29659	pub const fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29660	unsafe {
29661	let or: Simd = _mm_or_epi32(a, b).as_i32x4();
29662	transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i32x4()))
29663	}
29664	}
29665
29666	/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29667	///
29668	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi32&expand=4035)
29669	#[inline]
29670	#[target_feature(enable = "avx512f,avx512vl")]
29671	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29672	#[cfg_attr(test, assert_instr(vpord))]
29673	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29674	pub const fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29675	unsafe {
29676	let or: Simd = _mm_or_epi32(a, b).as_i32x4();
29677	transmute(src:simd_select_bitmask(m:k, yes:or, no:i32x4::ZERO))
29678	}
29679	}
29680
29681	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
29682	///
29683	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi64&expand=4051)
29684	#[inline]
29685	#[target_feature(enable = "avx512f")]
29686	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29687	#[cfg_attr(test, assert_instr(vporq))]
29688	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29689	pub const fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
29690	unsafe { transmute(src:simd_or(x:a.as_i64x8(), y:b.as_i64x8())) }
29691	}
29692
29693	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29694	///
29695	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi64&expand=4049)
29696	#[inline]
29697	#[target_feature(enable = "avx512f")]
29698	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29699	#[cfg_attr(test, assert_instr(vporq))]
29700	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29701	pub const fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29702	unsafe {
29703	let or: Simd = _mm512_or_epi64(a, b).as_i64x8();
29704	transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i64x8()))
29705	}
29706	}
29707
29708	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29709	///
29710	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi64&expand=4050)
29711	#[inline]
29712	#[target_feature(enable = "avx512f")]
29713	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29714	#[cfg_attr(test, assert_instr(vporq))]
29715	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29716	pub const fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29717	unsafe {
29718	let or: Simd = _mm512_or_epi64(a, b).as_i64x8();
29719	transmute(src:simd_select_bitmask(m:k, yes:or, no:i64x8::ZERO))
29720	}
29721	}
29722
29723	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
29724	///
29725	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi64&expand=4048)
29726	#[inline]
29727	#[target_feature(enable = "avx512f,avx512vl")]
29728	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29729	#[cfg_attr(test, assert_instr(vor))] //should be vporq
29730	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29731	pub const fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i {
29732	unsafe { transmute(src:simd_or(x:a.as_i64x4(), y:b.as_i64x4())) }
29733	}
29734
29735	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29736	///
29737	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi64&expand=4046)
29738	#[inline]
29739	#[target_feature(enable = "avx512f,avx512vl")]
29740	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29741	#[cfg_attr(test, assert_instr(vporq))]
29742	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29743	pub const fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29744	unsafe {
29745	let or: Simd = _mm256_or_epi64(a, b).as_i64x4();
29746	transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i64x4()))
29747	}
29748	}
29749
29750	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29751	///
29752	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi64&expand=4047)
29753	#[inline]
29754	#[target_feature(enable = "avx512f,avx512vl")]
29755	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29756	#[cfg_attr(test, assert_instr(vporq))]
29757	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29758	pub const fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29759	unsafe {
29760	let or: Simd = _mm256_or_epi64(a, b).as_i64x4();
29761	transmute(src:simd_select_bitmask(m:k, yes:or, no:i64x4::ZERO))
29762	}
29763	}
29764
29765	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
29766	///
29767	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi64&expand=4045)
29768	#[inline]
29769	#[target_feature(enable = "avx512f,avx512vl")]
29770	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29771	#[cfg_attr(test, assert_instr(vor))] //should be vporq
29772	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29773	pub const fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i {
29774	unsafe { transmute(src:simd_or(x:a.as_i64x2(), y:b.as_i64x2())) }
29775	}
29776
29777	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29778	///
29779	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi64&expand=4043)
29780	#[inline]
29781	#[target_feature(enable = "avx512f,avx512vl")]
29782	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29783	#[cfg_attr(test, assert_instr(vporq))]
29784	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29785	pub const fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29786	unsafe {
29787	let or: Simd = _mm_or_epi64(a, b).as_i64x2();
29788	transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i64x2()))
29789	}
29790	}
29791
29792	/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29793	///
29794	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi64&expand=4044)
29795	#[inline]
29796	#[target_feature(enable = "avx512f,avx512vl")]
29797	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29798	#[cfg_attr(test, assert_instr(vporq))]
29799	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29800	pub const fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29801	unsafe {
29802	let or: Simd = _mm_or_epi64(a, b).as_i64x2();
29803	transmute(src:simd_select_bitmask(m:k, yes:or, no:i64x2::ZERO))
29804	}
29805	}
29806
29807	/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst.
29808	///
29809	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_si512&expand=4072)
29810	#[inline]
29811	#[target_feature(enable = "avx512f")]
29812	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29813	#[cfg_attr(test, assert_instr(vporq))]
29814	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29815	pub const fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
29816	unsafe { transmute(src:simd_or(x:a.as_i32x16(), y:b.as_i32x16())) }
29817	}
29818
29819	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
29820	///
29821	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi32&expand=6142)
29822	#[inline]
29823	#[target_feature(enable = "avx512f")]
29824	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29825	#[cfg_attr(test, assert_instr(vpxorq))] //should be vpxord
29826	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29827	pub const fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
29828	unsafe { transmute(src:simd_xor(x:a.as_i32x16(), y:b.as_i32x16())) }
29829	}
29830
29831	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29832	///
29833	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi32&expand=6140)
29834	#[inline]
29835	#[target_feature(enable = "avx512f")]
29836	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29837	#[cfg_attr(test, assert_instr(vpxord))]
29838	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29839	pub const fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29840	unsafe {
29841	let xor: Simd = _mm512_xor_epi32(a, b).as_i32x16();
29842	transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i32x16()))
29843	}
29844	}
29845
29846	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29847	///
29848	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi32&expand=6141)
29849	#[inline]
29850	#[target_feature(enable = "avx512f")]
29851	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29852	#[cfg_attr(test, assert_instr(vpxord))]
29853	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29854	pub const fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29855	unsafe {
29856	let xor: Simd = _mm512_xor_epi32(a, b).as_i32x16();
29857	transmute(src:simd_select_bitmask(m:k, yes:xor, no:i32x16::ZERO))
29858	}
29859	}
29860
29861	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
29862	///
29863	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi32&expand=6139)
29864	#[inline]
29865	#[target_feature(enable = "avx512f,avx512vl")]
29866	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29867	#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
29868	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29869	pub const fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i {
29870	unsafe { transmute(src:simd_xor(x:a.as_i32x8(), y:b.as_i32x8())) }
29871	}
29872
29873	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29874	///
29875	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi32&expand=6137)
29876	#[inline]
29877	#[target_feature(enable = "avx512f,avx512vl")]
29878	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29879	#[cfg_attr(test, assert_instr(vpxord))]
29880	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29881	pub const fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29882	unsafe {
29883	let xor: Simd = _mm256_xor_epi32(a, b).as_i32x8();
29884	transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i32x8()))
29885	}
29886	}
29887
29888	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29889	///
29890	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi32&expand=6138)
29891	#[inline]
29892	#[target_feature(enable = "avx512f,avx512vl")]
29893	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29894	#[cfg_attr(test, assert_instr(vpxord))]
29895	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29896	pub const fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29897	unsafe {
29898	let xor: Simd = _mm256_xor_epi32(a, b).as_i32x8();
29899	transmute(src:simd_select_bitmask(m:k, yes:xor, no:i32x8::ZERO))
29900	}
29901	}
29902
29903	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
29904	///
29905	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi32&expand=6136)
29906	#[inline]
29907	#[target_feature(enable = "avx512f,avx512vl")]
29908	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29909	#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
29910	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29911	pub const fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i {
29912	unsafe { transmute(src:simd_xor(x:a.as_i32x4(), y:b.as_i32x4())) }
29913	}
29914
29915	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29916	///
29917	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi32&expand=6134)
29918	#[inline]
29919	#[target_feature(enable = "avx512f,avx512vl")]
29920	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29921	#[cfg_attr(test, assert_instr(vpxord))]
29922	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29923	pub const fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29924	unsafe {
29925	let xor: Simd = _mm_xor_epi32(a, b).as_i32x4();
29926	transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i32x4()))
29927	}
29928	}
29929
29930	/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29931	///
29932	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi32&expand=6135)
29933	#[inline]
29934	#[target_feature(enable = "avx512f,avx512vl")]
29935	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29936	#[cfg_attr(test, assert_instr(vpxord))]
29937	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29938	pub const fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29939	unsafe {
29940	let xor: Simd = _mm_xor_epi32(a, b).as_i32x4();
29941	transmute(src:simd_select_bitmask(m:k, yes:xor, no:i32x4::ZERO))
29942	}
29943	}
29944
29945	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
29946	///
29947	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi64&expand=6151)
29948	#[inline]
29949	#[target_feature(enable = "avx512f")]
29950	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29951	#[cfg_attr(test, assert_instr(vpxorq))]
29952	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29953	pub const fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
29954	unsafe { transmute(src:simd_xor(x:a.as_i64x8(), y:b.as_i64x8())) }
29955	}
29956
29957	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29958	///
29959	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi64&expand=6149)
29960	#[inline]
29961	#[target_feature(enable = "avx512f")]
29962	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29963	#[cfg_attr(test, assert_instr(vpxorq))]
29964	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29965	pub const fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29966	unsafe {
29967	let xor: Simd = _mm512_xor_epi64(a, b).as_i64x8();
29968	transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i64x8()))
29969	}
29970	}
29971
29972	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29973	///
29974	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi64&expand=6150)
29975	#[inline]
29976	#[target_feature(enable = "avx512f")]
29977	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29978	#[cfg_attr(test, assert_instr(vpxorq))]
29979	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29980	pub const fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29981	unsafe {
29982	let xor: Simd = _mm512_xor_epi64(a, b).as_i64x8();
29983	transmute(src:simd_select_bitmask(m:k, yes:xor, no:i64x8::ZERO))
29984	}
29985	}
29986
29987	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
29988	///
29989	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi64&expand=6148)
29990	#[inline]
29991	#[target_feature(enable = "avx512f,avx512vl")]
29992	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29993	#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
29994	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29995	pub const fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i {
29996	unsafe { transmute(src:simd_xor(x:a.as_i64x4(), y:b.as_i64x4())) }
29997	}
29998
29999	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30000	///
30001	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi64&expand=6146)
30002	#[inline]
30003	#[target_feature(enable = "avx512f,avx512vl")]
30004	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30005	#[cfg_attr(test, assert_instr(vpxorq))]
30006	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30007	pub const fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
30008	unsafe {
30009	let xor: Simd = _mm256_xor_epi64(a, b).as_i64x4();
30010	transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i64x4()))
30011	}
30012	}
30013
30014	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30015	///
30016	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi64&expand=6147)
30017	#[inline]
30018	#[target_feature(enable = "avx512f,avx512vl")]
30019	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30020	#[cfg_attr(test, assert_instr(vpxorq))]
30021	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30022	pub const fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
30023	unsafe {
30024	let xor: Simd = _mm256_xor_epi64(a, b).as_i64x4();
30025	transmute(src:simd_select_bitmask(m:k, yes:xor, no:i64x4::ZERO))
30026	}
30027	}
30028
30029	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
30030	///
30031	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi64&expand=6145)
30032	#[inline]
30033	#[target_feature(enable = "avx512f,avx512vl")]
30034	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30035	#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
30036	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30037	pub const fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i {
30038	unsafe { transmute(src:simd_xor(x:a.as_i64x2(), y:b.as_i64x2())) }
30039	}
30040
30041	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30042	///
30043	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi64&expand=6143)
30044	#[inline]
30045	#[target_feature(enable = "avx512f,avx512vl")]
30046	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30047	#[cfg_attr(test, assert_instr(vpxorq))]
30048	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30049	pub const fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30050	unsafe {
30051	let xor: Simd = _mm_xor_epi64(a, b).as_i64x2();
30052	transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i64x2()))
30053	}
30054	}
30055
30056	/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30057	///
30058	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi64&expand=6144)
30059	#[inline]
30060	#[target_feature(enable = "avx512f,avx512vl")]
30061	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30062	#[cfg_attr(test, assert_instr(vpxorq))]
30063	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30064	pub const fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30065	unsafe {
30066	let xor: Simd = _mm_xor_epi64(a, b).as_i64x2();
30067	transmute(src:simd_select_bitmask(m:k, yes:xor, no:i64x2::ZERO))
30068	}
30069	}
30070
30071	/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst.
30072	///
30073	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_si512&expand=6172)
30074	#[inline]
30075	#[target_feature(enable = "avx512f")]
30076	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30077	#[cfg_attr(test, assert_instr(vpxorq))]
30078	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30079	pub const fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
30080	unsafe { transmute(src:simd_xor(x:a.as_i32x16(), y:b.as_i32x16())) }
30081	}
30082
30083	/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst.
30084	///
30085	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi32&expand=310)
30086	#[inline]
30087	#[target_feature(enable = "avx512f")]
30088	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30089	#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
30090	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30091	pub const fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i {
30092	_mm512_and_epi32(a:_mm512_xor_epi32(a, b:_mm512_set1_epi32(u32::MAX as i32)), b)
30093	}
30094
30095	/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30096	///
30097	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi32&expand=311)
30098	#[inline]
30099	#[target_feature(enable = "avx512f")]
30100	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30101	#[cfg_attr(test, assert_instr(vpandnd))]
30102	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30103	pub const fn _mm512_mask_andnot_epi32(
30104	src: __m512i,
30105	k: __mmask16,
30106	a: __m512i,
30107	b: __m512i,
30108	) -> __m512i {
30109	unsafe {
30110	let andnot: Simd = _mm512_andnot_epi32(a, b).as_i32x16();
30111	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i32x16()))
30112	}
30113	}
30114
30115	/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30116	///
30117	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi32&expand=312)
30118	#[inline]
30119	#[target_feature(enable = "avx512f")]
30120	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30121	#[cfg_attr(test, assert_instr(vpandnd))]
30122	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30123	pub const fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
30124	unsafe {
30125	let andnot: Simd = _mm512_andnot_epi32(a, b).as_i32x16();
30126	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i32x16::ZERO))
30127	}
30128	}
30129
30130	/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30131	///
30132	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi32&expand=308)
30133	#[inline]
30134	#[target_feature(enable = "avx512f,avx512vl")]
30135	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30136	#[cfg_attr(test, assert_instr(vpandnd))]
30137	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30138	pub const fn _mm256_mask_andnot_epi32(
30139	src: __m256i,
30140	k: __mmask8,
30141	a: __m256i,
30142	b: __m256i,
30143	) -> __m256i {
30144	unsafe {
30145	let not: __m256i = _mm256_xor_epi32(a, b:_mm256_set1_epi32(u32::MAX as i32));
30146	let andnot: Simd = simd_and(x:not.as_i32x8(), y:b.as_i32x8());
30147	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i32x8()))
30148	}
30149	}
30150
30151	/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30152	///
30153	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi32&expand=309)
30154	#[inline]
30155	#[target_feature(enable = "avx512f,avx512vl")]
30156	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30157	#[cfg_attr(test, assert_instr(vpandnd))]
30158	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30159	pub const fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
30160	unsafe {
30161	let not: __m256i = _mm256_xor_epi32(a, b:_mm256_set1_epi32(u32::MAX as i32));
30162	let andnot: Simd = simd_and(x:not.as_i32x8(), y:b.as_i32x8());
30163	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i32x8::ZERO))
30164	}
30165	}
30166
30167	/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30168	///
30169	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi32&expand=306)
30170	#[inline]
30171	#[target_feature(enable = "avx512f,avx512vl")]
30172	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30173	#[cfg_attr(test, assert_instr(vpandnd))]
30174	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30175	pub const fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30176	unsafe {
30177	let not: __m128i = _mm_xor_epi32(a, b:_mm_set1_epi32(u32::MAX as i32));
30178	let andnot: Simd = simd_and(x:not.as_i32x4(), y:b.as_i32x4());
30179	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i32x4()))
30180	}
30181	}
30182
30183	/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30184	///
30185	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi32&expand=307)
30186	#[inline]
30187	#[target_feature(enable = "avx512f,avx512vl")]
30188	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30189	#[cfg_attr(test, assert_instr(vpandnd))]
30190	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30191	pub const fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30192	unsafe {
30193	let not: __m128i = _mm_xor_epi32(a, b:_mm_set1_epi32(u32::MAX as i32));
30194	let andnot: Simd = simd_and(x:not.as_i32x4(), y:b.as_i32x4());
30195	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i32x4::ZERO))
30196	}
30197	}
30198
30199	/// Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in a and then AND with b, and store the results in dst.
30200	///
30201	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi64&expand=317)
30202	#[inline]
30203	#[target_feature(enable = "avx512f")]
30204	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30205	#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
30206	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30207	pub const fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i {
30208	_mm512_and_epi64(a:_mm512_xor_epi64(a, b:_mm512_set1_epi64(u64::MAX as i64)), b)
30209	}
30210
30211	/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30212	///
30213	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi64&expand=318)
30214	#[inline]
30215	#[target_feature(enable = "avx512f")]
30216	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30217	#[cfg_attr(test, assert_instr(vpandnq))]
30218	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30219	pub const fn _mm512_mask_andnot_epi64(
30220	src: __m512i,
30221	k: __mmask8,
30222	a: __m512i,
30223	b: __m512i,
30224	) -> __m512i {
30225	unsafe {
30226	let andnot: Simd = _mm512_andnot_epi64(a, b).as_i64x8();
30227	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i64x8()))
30228	}
30229	}
30230
30231	/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30232	///
30233	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi64&expand=319)
30234	#[inline]
30235	#[target_feature(enable = "avx512f")]
30236	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30237	#[cfg_attr(test, assert_instr(vpandnq))]
30238	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30239	pub const fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
30240	unsafe {
30241	let andnot: Simd = _mm512_andnot_epi64(a, b).as_i64x8();
30242	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i64x8::ZERO))
30243	}
30244	}
30245
30246	/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30247	///
30248	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi64&expand=315)
30249	#[inline]
30250	#[target_feature(enable = "avx512f,avx512vl")]
30251	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30252	#[cfg_attr(test, assert_instr(vpandnq))]
30253	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30254	pub const fn _mm256_mask_andnot_epi64(
30255	src: __m256i,
30256	k: __mmask8,
30257	a: __m256i,
30258	b: __m256i,
30259	) -> __m256i {
30260	unsafe {
30261	let not: __m256i = _mm256_xor_epi64(a, b:_mm256_set1_epi64x(u64::MAX as i64));
30262	let andnot: Simd = simd_and(x:not.as_i64x4(), y:b.as_i64x4());
30263	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i64x4()))
30264	}
30265	}
30266
30267	/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30268	///
30269	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi64&expand=316)
30270	#[inline]
30271	#[target_feature(enable = "avx512f,avx512vl")]
30272	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30273	#[cfg_attr(test, assert_instr(vpandnq))]
30274	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30275	pub const fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
30276	unsafe {
30277	let not: __m256i = _mm256_xor_epi64(a, b:_mm256_set1_epi64x(u64::MAX as i64));
30278	let andnot: Simd = simd_and(x:not.as_i64x4(), y:b.as_i64x4());
30279	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i64x4::ZERO))
30280	}
30281	}
30282
30283	/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30284	///
30285	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi64&expand=313)
30286	#[inline]
30287	#[target_feature(enable = "avx512f,avx512vl")]
30288	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30289	#[cfg_attr(test, assert_instr(vpandnq))]
30290	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30291	pub const fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30292	unsafe {
30293	let not: __m128i = _mm_xor_epi64(a, b:_mm_set1_epi64x(u64::MAX as i64));
30294	let andnot: Simd = simd_and(x:not.as_i64x2(), y:b.as_i64x2());
30295	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i64x2()))
30296	}
30297	}
30298
30299	/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30300	///
30301	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi64&expand=314)
30302	#[inline]
30303	#[target_feature(enable = "avx512f,avx512vl")]
30304	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30305	#[cfg_attr(test, assert_instr(vpandnq))]
30306	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30307	pub const fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30308	unsafe {
30309	let not: __m128i = _mm_xor_epi64(a, b:_mm_set1_epi64x(u64::MAX as i64));
30310	let andnot: Simd = simd_and(x:not.as_i64x2(), y:b.as_i64x2());
30311	transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i64x2::ZERO))
30312	}
30313	}
30314
30315	/// Compute the bitwise NOT of 512 bits (representing integer data) in a and then AND with b, and store the result in dst.
30316	///
30317	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_si512&expand=340)
30318	#[inline]
30319	#[target_feature(enable = "avx512f")]
30320	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30321	#[cfg_attr(test, assert_instr(vpandnq))]
30322	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30323	pub const fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
30324	_mm512_and_epi64(a:_mm512_xor_epi64(a, b:_mm512_set1_epi64(u64::MAX as i64)), b)
30325	}
30326
30327	/// Convert 16-bit mask a into an integer value, and store the result in dst.
30328	///
30329	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask16_u32)
30330	#[inline]
30331	#[target_feature(enable = "avx512f")]
30332	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30333	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30334	pub const fn _cvtmask16_u32(a: __mmask16) -> u32 {
30335	a as u32
30336	}
30337
30338	/// Convert 32-bit integer value a to an 16-bit mask and store the result in dst.
30339	///
30340	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask16)
30341	#[inline]
30342	#[target_feature(enable = "avx512f")]
30343	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30344	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30345	pub const fn _cvtu32_mask16(a: u32) -> __mmask16 {
30346	a as __mmask16
30347	}
30348
30349	/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
30350	///
30351	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kand_mask16&expand=3212)
30352	#[inline]
30353	#[target_feature(enable = "avx512f")]
30354	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30355	#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
30356	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30357	pub const fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30358	a & b
30359	}
30360
30361	/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
30362	///
30363	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kand&expand=3210)
30364	#[inline]
30365	#[target_feature(enable = "avx512f")]
30366	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30367	#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
30368	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30369	pub const fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
30370	a & b
30371	}
30372
30373	/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
30374	///
30375	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kor_mask16&expand=3239)
30376	#[inline]
30377	#[target_feature(enable = "avx512f")]
30378	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30379	#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
30380	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30381	pub const fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30382	a \| b
30383	}
30384
30385	/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
30386	///
30387	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kor&expand=3237)
30388	#[inline]
30389	#[target_feature(enable = "avx512f")]
30390	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30391	#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
30392	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30393	pub const fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
30394	a \| b
30395	}
30396
30397	/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
30398	///
30399	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxor_mask16&expand=3291)
30400	#[inline]
30401	#[target_feature(enable = "avx512f")]
30402	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30403	#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
30404	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30405	pub const fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30406	a ^ b
30407	}
30408
30409	/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
30410	///
30411	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxor&expand=3289)
30412	#[inline]
30413	#[target_feature(enable = "avx512f")]
30414	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30415	#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
30416	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30417	pub const fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
30418	a ^ b
30419	}
30420
30421	/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
30422	///
30423	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=knot_mask16&expand=3233)
30424	#[inline]
30425	#[target_feature(enable = "avx512f")]
30426	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30427	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30428	pub const fn _knot_mask16(a: __mmask16) -> __mmask16 {
30429	a ^ `0b11111111_11111111`
30430	}
30431
30432	/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
30433	///
30434	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_knot&expand=3231)
30435	#[inline]
30436	#[target_feature(enable = "avx512f")]
30437	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30438	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30439	pub const fn _mm512_knot(a: __mmask16) -> __mmask16 {
30440	a ^ `0b11111111_11111111`
30441	}
30442
30443	/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
30444	///
30445	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kandn_mask16&expand=3218)
30446	#[inline]
30447	#[target_feature(enable = "avx512f")]
30448	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30449	#[cfg_attr(test, assert_instr(not))] // generate normal and, not code instead of kandnw
30450	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30451	pub const fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30452	_mm512_kand(a:_mm512_knot(a), b)
30453	}
30454
30455	/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
30456	///
30457	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kandn&expand=3216)
30458	#[inline]
30459	#[target_feature(enable = "avx512f")]
30460	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30461	#[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandw
30462	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30463	pub const fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 {
30464	_mm512_kand(a:_mm512_knot(a), b)
30465	}
30466
30467	/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
30468	///
30469	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxnor_mask16&expand=3285)
30470	#[inline]
30471	#[target_feature(enable = "avx512f")]
30472	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30473	#[cfg_attr(test, assert_instr(xor))] // generate normal xor, not code instead of kxnorw
30474	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30475	pub const fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30476	_mm512_knot(_mm512_kxor(a, b))
30477	}
30478
30479	/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
30480	///
30481	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxnor&expand=3283)
30482	#[inline]
30483	#[target_feature(enable = "avx512f")]
30484	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30485	#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kandw
30486	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30487	pub const fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
30488	_mm512_knot(_mm512_kxor(a, b))
30489	}
30490
30491	/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
30492	/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
30493	///
30494	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask16_u8)
30495	#[inline]
30496	#[target_feature(enable = "avx512f")]
30497	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30498	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30499	pub const unsafe fn _kortest_mask16_u8(a: __mmask16, b: __mmask16, all_ones: *mut u8) -> u8 {
30500	let tmp: u16 = _kor_mask16(a, b);
30501	*all_ones = (tmp == `0xffff`) as u8;
30502	(tmp == `0`) as u8
30503	}
30504
30505	/// Compute the bitwise OR of 16-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
30506	/// store 0 in dst.
30507	///
30508	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask16_u8)
30509	#[inline]
30510	#[target_feature(enable = "avx512f")]
30511	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30512	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30513	pub const fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
30514	(_kor_mask16(a, b) == `0xffff`) as u8
30515	}
30516
30517	/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
30518	/// store 0 in dst.
30519	///
30520	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask16_u8)
30521	#[inline]
30522	#[target_feature(enable = "avx512f")]
30523	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30524	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30525	pub const fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
30526	(_kor_mask16(a, b) == `0`) as u8
30527	}
30528
30529	/// Shift 16-bit mask a left by count bits while shifting in zeros, and store the result in dst.
30530	///
30531	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask16)
30532	#[inline]
30533	#[target_feature(enable = "avx512f")]
30534	#[rustc_legacy_const_generics(`1`)]
30535	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30536	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30537	pub const fn _kshiftli_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
30538	a.unbounded_shl(COUNT)
30539	}
30540
30541	/// Shift 16-bit mask a right by count bits while shifting in zeros, and store the result in dst.
30542	///
30543	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask16)
30544	#[inline]
30545	#[target_feature(enable = "avx512f")]
30546	#[rustc_legacy_const_generics(`1`)]
30547	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30548	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30549	pub const fn _kshiftri_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
30550	a.unbounded_shr(COUNT)
30551	}
30552
30553	/// Load 16-bit mask from memory
30554	///
30555	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask16)
30556	#[inline]
30557	#[target_feature(enable = "avx512f")]
30558	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30559	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30560	pub const unsafe fn _load_mask16(mem_addr: *const __mmask16) -> __mmask16 {
30561	*mem_addr
30562	}
30563
30564	/// Store 16-bit mask to memory
30565	///
30566	/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask16)
30567	#[inline]
30568	#[target_feature(enable = "avx512f")]
30569	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30570	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30571	pub const unsafe fn _store_mask16(mem_addr: *mut __mmask16, a: __mmask16) {
30572	*mem_addr = a;
30573	}
30574
30575	/// Copy 16-bit mask a to k.
30576	///
30577	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_kmov&expand=3228)
30578	#[inline]
30579	#[target_feature(enable = "avx512f")]
30580	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30581	#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
30582	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30583	pub const fn _mm512_kmov(a: __mmask16) -> __mmask16 {
30584	a
30585	}
30586
30587	/// Converts integer mask into bitmask, storing the result in dst.
30588	///
30589	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_int2mask&expand=3189)
30590	#[inline]
30591	#[target_feature(enable = "avx512f")] // generate normal and code instead of kmovw
30592	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30593	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30594	pub const fn _mm512_int2mask(mask: i32) -> __mmask16 {
30595	mask as u16
30596	}
30597
30598	/// Converts bit mask k1 into an integer value, storing the results in dst.
30599	///
30600	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2int&expand=3544)
30601	#[inline]
30602	#[target_feature(enable = "avx512f")]
30603	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30604	#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
30605	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30606	pub const fn _mm512_mask2int(k1: __mmask16) -> i32 {
30607	k1 as i32
30608	}
30609
30610	/// Unpack and interleave 8 bits from masks a and b, and store the 16-bit result in k.
30611	///
30612	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackb&expand=3280)
30613	#[inline]
30614	#[target_feature(enable = "avx512f")]
30615	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30616	#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckbw
30617	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30618	pub const fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
30619	((a & `0xff`) << `8`) \| (b & `0xff`)
30620	}
30621
30622	/// Performs bitwise OR between k1 and k2, storing the result in dst. CF flag is set if dst consists of all 1's.
30623	///
30624	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestc&expand=3247)
30625	#[inline]
30626	#[target_feature(enable = "avx512f")]
30627	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30628	#[cfg_attr(test, assert_instr(cmp))] // generate normal and code instead of kortestw
30629	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30630	pub const fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
30631	let r: bool = (a \| b) == `0b11111111_11111111`;
30632	r as i32
30633	}
30634
30635	/// Performs bitwise OR between k1 and k2, storing the result in dst. ZF flag is set if dst is 0.
30636	///
30637	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestz)
30638	#[inline]
30639	#[target_feature(enable = "avx512f")]
30640	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30641	#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kortestw
30642	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30643	pub const fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 {
30644	let r: bool = (a \| b) == `0`;
30645	r as i32
30646	}
30647
30648	/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30649	///
30650	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi32_mask&expand=5890)
30651	#[inline]
30652	#[target_feature(enable = "avx512f")]
30653	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30654	#[cfg_attr(test, assert_instr(vptestmd))]
30655	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30656	pub const fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30657	let and: __m512i = _mm512_and_epi32(a, b);
30658	let zero: __m512i = _mm512_setzero_si512();
30659	_mm512_cmpneq_epi32_mask(a:and, b:zero)
30660	}
30661
30662	/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30663	///
30664	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi32_mask&expand=5889)
30665	#[inline]
30666	#[target_feature(enable = "avx512f")]
30667	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30668	#[cfg_attr(test, assert_instr(vptestmd))]
30669	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30670	pub const fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30671	let and: __m512i = _mm512_and_epi32(a, b);
30672	let zero: __m512i = _mm512_setzero_si512();
30673	_mm512_mask_cmpneq_epi32_mask(k1:k, a:and, b:zero)
30674	}
30675
30676	/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30677	///
30678	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi32_mask&expand=5888)
30679	#[inline]
30680	#[target_feature(enable = "avx512f,avx512vl")]
30681	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30682	#[cfg_attr(test, assert_instr(vptestmd))]
30683	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30684	pub const fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30685	let and: __m256i = _mm256_and_si256(a, b);
30686	let zero: __m256i = _mm256_setzero_si256();
30687	_mm256_cmpneq_epi32_mask(a:and, b:zero)
30688	}
30689
30690	/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30691	///
30692	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi32_mask&expand=5887)
30693	#[inline]
30694	#[target_feature(enable = "avx512f,avx512vl")]
30695	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30696	#[cfg_attr(test, assert_instr(vptestmd))]
30697	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30698	pub const fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30699	let and: __m256i = _mm256_and_si256(a, b);
30700	let zero: __m256i = _mm256_setzero_si256();
30701	_mm256_mask_cmpneq_epi32_mask(k1:k, a:and, b:zero)
30702	}
30703
30704	/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30705	///
30706	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi32_mask&expand=5886)
30707	#[inline]
30708	#[target_feature(enable = "avx512f,avx512vl")]
30709	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30710	#[cfg_attr(test, assert_instr(vptestmd))]
30711	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30712	pub const fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30713	let and: __m128i = _mm_and_si128(a, b);
30714	let zero: __m128i = _mm_setzero_si128();
30715	_mm_cmpneq_epi32_mask(a:and, b:zero)
30716	}
30717
30718	/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30719	///
30720	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi32_mask&expand=5885)
30721	#[inline]
30722	#[target_feature(enable = "avx512f,avx512vl")]
30723	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30724	#[cfg_attr(test, assert_instr(vptestmd))]
30725	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30726	pub const fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30727	let and: __m128i = _mm_and_si128(a, b);
30728	let zero: __m128i = _mm_setzero_si128();
30729	_mm_mask_cmpneq_epi32_mask(k1:k, a:and, b:zero)
30730	}
30731
30732	/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30733	///
30734	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi64_mask&expand=5896)
30735	#[inline]
30736	#[target_feature(enable = "avx512f")]
30737	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30738	#[cfg_attr(test, assert_instr(vptestmq))]
30739	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30740	pub const fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30741	let and: __m512i = _mm512_and_epi64(a, b);
30742	let zero: __m512i = _mm512_setzero_si512();
30743	_mm512_cmpneq_epi64_mask(a:and, b:zero)
30744	}
30745
30746	/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30747	///
30748	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi64_mask&expand=5895)
30749	#[inline]
30750	#[target_feature(enable = "avx512f")]
30751	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30752	#[cfg_attr(test, assert_instr(vptestmq))]
30753	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30754	pub const fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30755	let and: __m512i = _mm512_and_epi64(a, b);
30756	let zero: __m512i = _mm512_setzero_si512();
30757	_mm512_mask_cmpneq_epi64_mask(k1:k, a:and, b:zero)
30758	}
30759
30760	/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30761	///
30762	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi64_mask&expand=5894)
30763	#[inline]
30764	#[target_feature(enable = "avx512f,avx512vl")]
30765	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30766	#[cfg_attr(test, assert_instr(vptestmq))]
30767	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30768	pub const fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30769	let and: __m256i = _mm256_and_si256(a, b);
30770	let zero: __m256i = _mm256_setzero_si256();
30771	_mm256_cmpneq_epi64_mask(a:and, b:zero)
30772	}
30773
30774	/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30775	///
30776	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi64_mask&expand=5893)
30777	#[inline]
30778	#[target_feature(enable = "avx512f,avx512vl")]
30779	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30780	#[cfg_attr(test, assert_instr(vptestmq))]
30781	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30782	pub const fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30783	let and: __m256i = _mm256_and_si256(a, b);
30784	let zero: __m256i = _mm256_setzero_si256();
30785	_mm256_mask_cmpneq_epi64_mask(k1:k, a:and, b:zero)
30786	}
30787
30788	/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30789	///
30790	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi64_mask&expand=5892)
30791	#[inline]
30792	#[target_feature(enable = "avx512f,avx512vl")]
30793	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30794	#[cfg_attr(test, assert_instr(vptestmq))]
30795	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30796	pub const fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30797	let and: __m128i = _mm_and_si128(a, b);
30798	let zero: __m128i = _mm_setzero_si128();
30799	_mm_cmpneq_epi64_mask(a:and, b:zero)
30800	}
30801
30802	/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30803	///
30804	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi64_mask&expand=5891)
30805	#[inline]
30806	#[target_feature(enable = "avx512f,avx512vl")]
30807	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30808	#[cfg_attr(test, assert_instr(vptestmq))]
30809	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30810	pub const fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30811	let and: __m128i = _mm_and_si128(a, b);
30812	let zero: __m128i = _mm_setzero_si128();
30813	_mm_mask_cmpneq_epi64_mask(k1:k, a:and, b:zero)
30814	}
30815
30816	/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30817	///
30818	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi32_mask&expand=5921)
30819	#[inline]
30820	#[target_feature(enable = "avx512f")]
30821	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30822	#[cfg_attr(test, assert_instr(vptestnmd))]
30823	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30824	pub const fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30825	let and: __m512i = _mm512_and_epi32(a, b);
30826	let zero: __m512i = _mm512_setzero_si512();
30827	_mm512_cmpeq_epi32_mask(a:and, b:zero)
30828	}
30829
30830	/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30831	///
30832	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi32_mask&expand=5920)
30833	#[inline]
30834	#[target_feature(enable = "avx512f")]
30835	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30836	#[cfg_attr(test, assert_instr(vptestnmd))]
30837	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30838	pub const fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30839	let and: __m512i = _mm512_and_epi32(a, b);
30840	let zero: __m512i = _mm512_setzero_si512();
30841	_mm512_mask_cmpeq_epi32_mask(k1:k, a:and, b:zero)
30842	}
30843
30844	/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30845	///
30846	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi32_mask&expand=5919)
30847	#[inline]
30848	#[target_feature(enable = "avx512f,avx512vl")]
30849	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30850	#[cfg_attr(test, assert_instr(vptestnmd))]
30851	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30852	pub const fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30853	let and: __m256i = _mm256_and_si256(a, b);
30854	let zero: __m256i = _mm256_setzero_si256();
30855	_mm256_cmpeq_epi32_mask(a:and, b:zero)
30856	}
30857
30858	/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30859	///
30860	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi32_mask&expand=5918)
30861	#[inline]
30862	#[target_feature(enable = "avx512f,avx512vl")]
30863	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30864	#[cfg_attr(test, assert_instr(vptestnmd))]
30865	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30866	pub const fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30867	let and: __m256i = _mm256_and_si256(a, b);
30868	let zero: __m256i = _mm256_setzero_si256();
30869	_mm256_mask_cmpeq_epi32_mask(k1:k, a:and, b:zero)
30870	}
30871
30872	/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30873	///
30874	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi32_mask&expand=5917)
30875	#[inline]
30876	#[target_feature(enable = "avx512f,avx512vl")]
30877	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30878	#[cfg_attr(test, assert_instr(vptestnmd))]
30879	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30880	pub const fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30881	let and: __m128i = _mm_and_si128(a, b);
30882	let zero: __m128i = _mm_setzero_si128();
30883	_mm_cmpeq_epi32_mask(a:and, b:zero)
30884	}
30885
30886	/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30887	///
30888	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi32_mask&expand=5916)
30889	#[inline]
30890	#[target_feature(enable = "avx512f,avx512vl")]
30891	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30892	#[cfg_attr(test, assert_instr(vptestnmd))]
30893	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30894	pub const fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30895	let and: __m128i = _mm_and_si128(a, b);
30896	let zero: __m128i = _mm_setzero_si128();
30897	_mm_mask_cmpeq_epi32_mask(k1:k, a:and, b:zero)
30898	}
30899
30900	/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30901	///
30902	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi64_mask&expand=5927)
30903	#[inline]
30904	#[target_feature(enable = "avx512f")]
30905	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30906	#[cfg_attr(test, assert_instr(vptestnmq))]
30907	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30908	pub const fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30909	let and: __m512i = _mm512_and_epi64(a, b);
30910	let zero: __m512i = _mm512_setzero_si512();
30911	_mm512_cmpeq_epi64_mask(a:and, b:zero)
30912	}
30913
30914	/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30915	///
30916	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi64_mask&expand=5926)
30917	#[inline]
30918	#[target_feature(enable = "avx512f")]
30919	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30920	#[cfg_attr(test, assert_instr(vptestnmq))]
30921	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30922	pub const fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30923	let and: __m512i = _mm512_and_epi64(a, b);
30924	let zero: __m512i = _mm512_setzero_si512();
30925	_mm512_mask_cmpeq_epi64_mask(k1:k, a:and, b:zero)
30926	}
30927
30928	/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30929	///
30930	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi64_mask&expand=5925)
30931	#[inline]
30932	#[target_feature(enable = "avx512f,avx512vl")]
30933	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30934	#[cfg_attr(test, assert_instr(vptestnmq))]
30935	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30936	pub const fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30937	let and: __m256i = _mm256_and_si256(a, b);
30938	let zero: __m256i = _mm256_setzero_si256();
30939	_mm256_cmpeq_epi64_mask(a:and, b:zero)
30940	}
30941
30942	/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30943	///
30944	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi64_mask&expand=5924)
30945	#[inline]
30946	#[target_feature(enable = "avx512f,avx512vl")]
30947	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30948	#[cfg_attr(test, assert_instr(vptestnmq))]
30949	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30950	pub const fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30951	let and: __m256i = _mm256_and_si256(a, b);
30952	let zero: __m256i = _mm256_setzero_si256();
30953	_mm256_mask_cmpeq_epi64_mask(k1:k, a:and, b:zero)
30954	}
30955
30956	/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30957	///
30958	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi64_mask&expand=5923)
30959	#[inline]
30960	#[target_feature(enable = "avx512f,avx512vl")]
30961	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30962	#[cfg_attr(test, assert_instr(vptestnmq))]
30963	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30964	pub const fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30965	let and: __m128i = _mm_and_si128(a, b);
30966	let zero: __m128i = _mm_setzero_si128();
30967	_mm_cmpeq_epi64_mask(a:and, b:zero)
30968	}
30969
30970	/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30971	///
30972	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi64_mask&expand=5922)
30973	#[inline]
30974	#[target_feature(enable = "avx512f,avx512vl")]
30975	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30976	#[cfg_attr(test, assert_instr(vptestnmq))]
30977	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30978	pub const fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30979	let and: __m128i = _mm_and_si128(a, b);
30980	let zero: __m128i = _mm_setzero_si128();
30981	_mm_mask_cmpeq_epi64_mask(k1:k, a:and, b:zero)
30982	}
30983
30984	/// Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
30985	///
30986	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_ps&expand=5671)
30987	///
30988	/// # Safety of non-temporal stores
30989	///
30990	/// After using this intrinsic, but before any other access to the memory that this intrinsic
30991	/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
30992	/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
30993	/// return.
30994	///
30995	/// See [`_mm_sfence`] for details.
30996	#[inline]
30997	#[target_feature(enable = "avx512f")]
30998	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30999	#[cfg_attr(test, assert_instr(vmovntps))]
31000	#[allow(clippy::cast_ptr_alignment)]
31001	pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) {
31002	// see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
31003	crate::arch::asm!(
31004	vps!("vmovntps", ",{a}"),
31005	p = in(reg) mem_addr,
31006	a = in(zmm_reg) a,
31007	options(nostack, preserves_flags),
31008	);
31009	}
31010
31011	/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
31012	///
31013	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_pd&expand=5667)
31014	///
31015	/// # Safety of non-temporal stores
31016	///
31017	/// After using this intrinsic, but before any other access to the memory that this intrinsic
31018	/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
31019	/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
31020	/// return.
31021	///
31022	/// See [`_mm_sfence`] for details.
31023	#[inline]
31024	#[target_feature(enable = "avx512f")]
31025	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31026	#[cfg_attr(test, assert_instr(vmovntpd))]
31027	#[allow(clippy::cast_ptr_alignment)]
31028	pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
31029	// see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
31030	crate::arch::asm!(
31031	vps!("vmovntpd", ",{a}"),
31032	p = in(reg) mem_addr,
31033	a = in(zmm_reg) a,
31034	options(nostack, preserves_flags),
31035	);
31036	}
31037
31038	/// Store 512-bits of integer data from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
31039	///
31040	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_si512&expand=5675)
31041	///
31042	/// # Safety of non-temporal stores
31043	///
31044	/// After using this intrinsic, but before any other access to the memory that this intrinsic
31045	/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
31046	/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
31047	/// return.
31048	///
31049	/// See [`_mm_sfence`] for details.
31050	#[inline]
31051	#[target_feature(enable = "avx512f")]
31052	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31053	#[cfg_attr(test, assert_instr(vmovntdq))]
31054	#[allow(clippy::cast_ptr_alignment)]
31055	pub unsafe fn _mm512_stream_si512(mem_addr: *mut __m512i, a: __m512i) {
31056	// see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
31057	crate::arch::asm!(
31058	vps!("vmovntdq", ",{a}"),
31059	p = in(reg) mem_addr,
31060	a = in(zmm_reg) a,
31061	options(nostack, preserves_flags),
31062	);
31063	}
31064
31065	/// Load 512-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr
31066	/// must be aligned on a 64-byte boundary or a general-protection exception may be generated. To
31067	/// minimize caching, the data is flagged as non-temporal (unlikely to be used again soon)
31068	///
31069	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_load_si512)
31070	#[inline]
31071	#[target_feature(enable = "avx512f")]
31072	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31073	pub unsafe fn _mm512_stream_load_si512(mem_addr: *const __m512i) -> __m512i {
31074	let dst: __m512i;
31075	crate::arch::asm!(
31076	vpl!("vmovntdqa {a}"),
31077	a = out(zmm_reg) dst,
31078	p = in(reg) mem_addr,
31079	options(pure, readonly, nostack, preserves_flags),
31080	);
31081	dst
31082	}
31083
31084	/// Sets packed 32-bit integers in `dst` with the supplied values.
31085	///
31086	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_ps&expand=4931)
31087	#[inline]
31088	#[target_feature(enable = "avx512f")]
31089	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31090	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31091	pub const fn _mm512_set_ps(
31092	e0: f32,
31093	e1: f32,
31094	e2: f32,
31095	e3: f32,
31096	e4: f32,
31097	e5: f32,
31098	e6: f32,
31099	e7: f32,
31100	e8: f32,
31101	e9: f32,
31102	e10: f32,
31103	e11: f32,
31104	e12: f32,
31105	e13: f32,
31106	e14: f32,
31107	e15: f32,
31108	) -> __m512 {
31109	_mm512_setr_ps(
31110	e0:e15, e1:e14, e2:e13, e3:e12, e4:e11, e5:e10, e6:e9, e7:e8, e8:e7, e9:e6, e10:e5, e11:e4, e12:e3, e13:e2, e14:e1, e15:e0,
31111	)
31112	}
31113
31114	/// Sets packed 32-bit integers in `dst` with the supplied values in
31115	/// reverse order.
31116	///
31117	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_ps&expand=5008)
31118	#[inline]
31119	#[target_feature(enable = "avx512f")]
31120	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31121	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31122	pub const fn _mm512_setr_ps(
31123	e0: f32,
31124	e1: f32,
31125	e2: f32,
31126	e3: f32,
31127	e4: f32,
31128	e5: f32,
31129	e6: f32,
31130	e7: f32,
31131	e8: f32,
31132	e9: f32,
31133	e10: f32,
31134	e11: f32,
31135	e12: f32,
31136	e13: f32,
31137	e14: f32,
31138	e15: f32,
31139	) -> __m512 {
31140	unsafe {
31141	let r: Simd = f32x16::new(
31142	x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15,
31143	);
31144	transmute(src:r)
31145	}
31146	}
31147
31148	/// Broadcast 64-bit float `a` to all elements of `dst`.
31149	///
31150	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_pd&expand=4975)
31151	#[inline]
31152	#[target_feature(enable = "avx512f")]
31153	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31154	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31155	pub const fn _mm512_set1_pd(a: f64) -> __m512d {
31156	unsafe { transmute(src:f64x8::splat(a)) }
31157	}
31158
31159	/// Broadcast 32-bit float `a` to all elements of `dst`.
31160	///
31161	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_ps&expand=4981)
31162	#[inline]
31163	#[target_feature(enable = "avx512f")]
31164	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31165	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31166	pub const fn _mm512_set1_ps(a: f32) -> __m512 {
31167	unsafe { transmute(src:f32x16::splat(a)) }
31168	}
31169
31170	/// Sets packed 32-bit integers in `dst` with the supplied values.
31171	///
31172	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi32&expand=4908)
31173	#[inline]
31174	#[target_feature(enable = "avx512f")]
31175	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31176	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31177	pub const fn _mm512_set_epi32(
31178	e15: i32,
31179	e14: i32,
31180	e13: i32,
31181	e12: i32,
31182	e11: i32,
31183	e10: i32,
31184	e9: i32,
31185	e8: i32,
31186	e7: i32,
31187	e6: i32,
31188	e5: i32,
31189	e4: i32,
31190	e3: i32,
31191	e2: i32,
31192	e1: i32,
31193	e0: i32,
31194	) -> __m512i {
31195	_mm512_setr_epi32(
31196	e15:e0, e14:e1, e13:e2, e12:e3, e11:e4, e10:e5, e9:e6, e8:e7, e7:e8, e6:e9, e5:e10, e4:e11, e3:e12, e2:e13, e1:e14, e0:e15,
31197	)
31198	}
31199
31200	/// Broadcast 8-bit integer a to all elements of dst.
31201	///
31202	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi8&expand=4972)
31203	#[inline]
31204	#[target_feature(enable = "avx512f")]
31205	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31206	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31207	pub const fn _mm512_set1_epi8(a: i8) -> __m512i {
31208	unsafe { transmute(src:i8x64::splat(a)) }
31209	}
31210
31211	/// Broadcast the low packed 16-bit integer from a to all elements of dst.
31212	///
31213	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi16&expand=4944)
31214	#[inline]
31215	#[target_feature(enable = "avx512f")]
31216	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31217	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31218	pub const fn _mm512_set1_epi16(a: i16) -> __m512i {
31219	unsafe { transmute(src:i16x32::splat(a)) }
31220	}
31221
31222	/// Broadcast 32-bit integer `a` to all elements of `dst`.
31223	///
31224	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_epi32)
31225	#[inline]
31226	#[target_feature(enable = "avx512f")]
31227	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31228	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31229	pub const fn _mm512_set1_epi32(a: i32) -> __m512i {
31230	unsafe { transmute(src:i32x16::splat(a)) }
31231	}
31232
31233	/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31234	///
31235	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi32&expand=4951)
31236	#[inline]
31237	#[target_feature(enable = "avx512f")]
31238	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31239	#[cfg_attr(test, assert_instr(vpbroadcastd))]
31240	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31241	pub const fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i {
31242	unsafe {
31243	let r: Simd = _mm512_set1_epi32(a).as_i32x16();
31244	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
31245	}
31246	}
31247
31248	/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31249	///
31250	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi32&expand=4952)
31251	#[inline]
31252	#[target_feature(enable = "avx512f")]
31253	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31254	#[cfg_attr(test, assert_instr(vpbroadcastd))]
31255	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31256	pub const fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i {
31257	unsafe {
31258	let r: Simd = _mm512_set1_epi32(a).as_i32x16();
31259	transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
31260	}
31261	}
31262
31263	/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31264	///
31265	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi32&expand=4948)
31266	#[inline]
31267	#[target_feature(enable = "avx512f,avx512vl")]
31268	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31269	#[cfg_attr(test, assert_instr(vpbroadcastd))]
31270	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31271	pub const fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m256i {
31272	unsafe {
31273	let r: Simd = _mm256_set1_epi32(a).as_i32x8();
31274	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
31275	}
31276	}
31277
31278	/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31279	///
31280	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi32&expand=4949)
31281	#[inline]
31282	#[target_feature(enable = "avx512f,avx512vl")]
31283	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31284	#[cfg_attr(test, assert_instr(vpbroadcastd))]
31285	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31286	pub const fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i {
31287	unsafe {
31288	let r: Simd = _mm256_set1_epi32(a).as_i32x8();
31289	transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
31290	}
31291	}
31292
31293	/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31294	///
31295	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi32&expand=4945)
31296	#[inline]
31297	#[target_feature(enable = "avx512f,avx512vl")]
31298	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31299	#[cfg_attr(test, assert_instr(vpbroadcastd))]
31300	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31301	pub const fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i {
31302	unsafe {
31303	let r: Simd = _mm_set1_epi32(a).as_i32x4();
31304	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
31305	}
31306	}
31307
31308	/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31309	///
31310	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi32&expand=4946)
31311	#[inline]
31312	#[target_feature(enable = "avx512f,avx512vl")]
31313	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31314	#[cfg_attr(test, assert_instr(vpbroadcastd))]
31315	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31316	pub const fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i {
31317	unsafe {
31318	let r: Simd = _mm_set1_epi32(a).as_i32x4();
31319	transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
31320	}
31321	}
31322
31323	/// Broadcast 64-bit integer `a` to all elements of `dst`.
31324	///
31325	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi64&expand=4961)
31326	#[inline]
31327	#[target_feature(enable = "avx512f")]
31328	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31329	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31330	pub const fn _mm512_set1_epi64(a: i64) -> __m512i {
31331	unsafe { transmute(src:i64x8::splat(a)) }
31332	}
31333
31334	/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31335	///
31336	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi64&expand=4959)
31337	#[inline]
31338	#[target_feature(enable = "avx512f")]
31339	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31340	#[cfg_attr(test, assert_instr(vpbroadcastq))]
31341	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31342	pub const fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i {
31343	unsafe {
31344	let r: Simd = _mm512_set1_epi64(a).as_i64x8();
31345	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x8()))
31346	}
31347	}
31348
31349	/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31350	///
31351	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi64&expand=4960)
31352	#[inline]
31353	#[target_feature(enable = "avx512f")]
31354	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31355	#[cfg_attr(test, assert_instr(vpbroadcastq))]
31356	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31357	pub const fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i {
31358	unsafe {
31359	let r: Simd = _mm512_set1_epi64(a).as_i64x8();
31360	transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x8::ZERO))
31361	}
31362	}
31363
31364	/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31365	///
31366	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi64&expand=4957)
31367	#[inline]
31368	#[target_feature(enable = "avx512f,avx512vl")]
31369	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31370	#[cfg_attr(test, assert_instr(vpbroadcastq))]
31371	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31372	pub const fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m256i {
31373	unsafe {
31374	let r: Simd = _mm256_set1_epi64x(a).as_i64x4();
31375	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x4()))
31376	}
31377	}
31378
31379	/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31380	///
31381	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi64&expand=4958)
31382	#[inline]
31383	#[target_feature(enable = "avx512f,avx512vl")]
31384	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31385	#[cfg_attr(test, assert_instr(vpbroadcastq))]
31386	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31387	pub const fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i {
31388	unsafe {
31389	let r: Simd = _mm256_set1_epi64x(a).as_i64x4();
31390	transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x4::ZERO))
31391	}
31392	}
31393
31394	/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31395	///
31396	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi64&expand=4954)
31397	#[inline]
31398	#[target_feature(enable = "avx512f,avx512vl")]
31399	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31400	#[cfg_attr(test, assert_instr(vpbroadcastq))]
31401	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31402	pub const fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i {
31403	unsafe {
31404	let r: Simd = _mm_set1_epi64x(a).as_i64x2();
31405	transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x2()))
31406	}
31407	}
31408
31409	/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31410	///
31411	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi64&expand=4955)
31412	#[inline]
31413	#[target_feature(enable = "avx512f,avx512vl")]
31414	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31415	#[cfg_attr(test, assert_instr(vpbroadcastq))]
31416	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31417	pub const fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i {
31418	unsafe {
31419	let r: Simd = _mm_set1_epi64x(a).as_i64x2();
31420	transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x2::ZERO))
31421	}
31422	}
31423
31424	/// Set packed 64-bit integers in dst with the repeated 4 element sequence.
31425	///
31426	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi64&expand=4983)
31427	#[inline]
31428	#[target_feature(enable = "avx512f")]
31429	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31430	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31431	pub const fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
31432	_mm512_set_epi64(e0:d, e1:c, e2:b, e3:a, e4:d, e5:c, e6:b, e7:a)
31433	}
31434
31435	/// Set packed 64-bit integers in dst with the repeated 4 element sequence in reverse order.
31436	///
31437	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi64&expand=5010)
31438	#[inline]
31439	#[target_feature(enable = "avx512f")]
31440	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31441	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31442	pub const fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
31443	_mm512_set_epi64(e0:a, e1:b, e2:c, e3:d, e4:a, e5:b, e6:c, e7:d)
31444	}
31445
31446	/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
31447	///
31448	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_ps_mask&expand=1074)
31449	#[inline]
31450	#[target_feature(enable = "avx512f")]
31451	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31452	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31453	pub fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31454	_mm512_cmp_ps_mask::<_CMP_LT_OS>(a, b)
31455	}
31456
31457	/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31458	///
31459	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_ps_mask&expand=1075)
31460	#[inline]
31461	#[target_feature(enable = "avx512f")]
31462	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31463	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31464	pub fn _mm512_mask_cmplt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31465	_mm512_mask_cmp_ps_mask::<_CMP_LT_OS>(k1, a, b)
31466	}
31467
31468	/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
31469	///
31470	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_ps_mask&expand=1154)
31471	#[inline]
31472	#[target_feature(enable = "avx512f")]
31473	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31474	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31475	pub fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31476	_mm512_cmp_ps_mask::<_CMP_NLT_US>(a, b)
31477	}
31478
31479	/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31480	///
31481	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_ps_mask&expand=1155)
31482	#[inline]
31483	#[target_feature(enable = "avx512f")]
31484	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31485	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31486	pub fn _mm512_mask_cmpnlt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31487	_mm512_mask_cmp_ps_mask::<_CMP_NLT_US>(k1, a, b)
31488	}
31489
31490	/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
31491	///
31492	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_ps_mask&expand=1013)
31493	#[inline]
31494	#[target_feature(enable = "avx512f")]
31495	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31496	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31497	pub fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31498	_mm512_cmp_ps_mask::<_CMP_LE_OS>(a, b)
31499	}
31500
31501	/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31502	///
31503	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_ps_mask&expand=1014)
31504	#[inline]
31505	#[target_feature(enable = "avx512f")]
31506	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31507	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31508	pub fn _mm512_mask_cmple_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31509	_mm512_mask_cmp_ps_mask::<_CMP_LE_OS>(k1, a, b)
31510	}
31511
31512	/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
31513	///
31514	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_ps_mask&expand=1146)
31515	#[inline]
31516	#[target_feature(enable = "avx512f")]
31517	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31518	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31519	pub fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31520	_mm512_cmp_ps_mask::<_CMP_NLE_US>(a, b)
31521	}
31522
31523	/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31524	///
31525	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_ps_mask&expand=1147)
31526	#[inline]
31527	#[target_feature(enable = "avx512f")]
31528	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31529	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31530	pub fn _mm512_mask_cmpnle_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31531	_mm512_mask_cmp_ps_mask::<_CMP_NLE_US>(k1, a, b)
31532	}
31533
31534	/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
31535	///
31536	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_ps_mask&expand=828)
31537	#[inline]
31538	#[target_feature(enable = "avx512f")]
31539	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31540	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31541	pub fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31542	_mm512_cmp_ps_mask::<_CMP_EQ_OQ>(a, b)
31543	}
31544
31545	/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31546	///
31547	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_ps_mask&expand=829)
31548	#[inline]
31549	#[target_feature(enable = "avx512f")]
31550	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31551	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31552	pub fn _mm512_mask_cmpeq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31553	_mm512_mask_cmp_ps_mask::<_CMP_EQ_OQ>(k1, a, b)
31554	}
31555
31556	/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
31557	///
31558	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_ps_mask&expand=1130)
31559	#[inline]
31560	#[target_feature(enable = "avx512f")]
31561	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31562	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31563	pub fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31564	_mm512_cmp_ps_mask::<_CMP_NEQ_UQ>(a, b)
31565	}
31566
31567	/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31568	///
31569	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_ps_mask&expand=1131)
31570	#[inline]
31571	#[target_feature(enable = "avx512f")]
31572	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31573	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31574	pub fn _mm512_mask_cmpneq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31575	_mm512_mask_cmp_ps_mask::<_CMP_NEQ_UQ>(k1, a, b)
31576	}
31577
31578	/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31579	///
31580	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_ps_mask&expand=749)
31581	#[inline]
31582	#[target_feature(enable = "avx512f")]
31583	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31584	#[rustc_legacy_const_generics(`2`)]
31585	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
31586	pub fn _mm512_cmp_ps_mask<const IMM8: i32>(a: __m512, b: __m512) -> __mmask16 {
31587	unsafe {
31588	static_assert_uimm_bits!(IMM8, `5`);
31589	let neg_one: i16 = `-1`;
31590	let a: Simd = a.as_f32x16();
31591	let b: Simd = b.as_f32x16();
31592	let r: i16 = vcmpps(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
31593	r.cast_unsigned()
31594	}
31595	}
31596
31597	/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31598	///
31599	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_ps_mask&expand=750)
31600	#[inline]
31601	#[target_feature(enable = "avx512f")]
31602	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31603	#[rustc_legacy_const_generics(`3`)]
31604	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
31605	pub fn _mm512_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31606	unsafe {
31607	static_assert_uimm_bits!(IMM8, `5`);
31608	let a: Simd = a.as_f32x16();
31609	let b: Simd = b.as_f32x16();
31610	let r: i16 = vcmpps(a, b, IMM8, m:k1 as i16, _MM_FROUND_CUR_DIRECTION);
31611	r.cast_unsigned()
31612	}
31613	}
31614
31615	/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31616	///
31617	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps_mask&expand=747)
31618	#[inline]
31619	#[target_feature(enable = "avx512f,avx512vl")]
31620	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31621	#[rustc_legacy_const_generics(`2`)]
31622	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
31623	pub fn _mm256_cmp_ps_mask<const IMM8: i32>(a: __m256, b: __m256) -> __mmask8 {
31624	unsafe {
31625	static_assert_uimm_bits!(IMM8, `5`);
31626	let neg_one: i8 = `-1`;
31627	let a: Simd = a.as_f32x8();
31628	let b: Simd = b.as_f32x8();
31629	let r: i8 = vcmpps256(a, b, IMM8, m:neg_one);
31630	r.cast_unsigned()
31631	}
31632	}
31633
31634	/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31635	///
31636	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_ps_mask&expand=748)
31637	#[inline]
31638	#[target_feature(enable = "avx512f,avx512vl")]
31639	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31640	#[rustc_legacy_const_generics(`3`)]
31641	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
31642	pub fn _mm256_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256, b: __m256) -> __mmask8 {
31643	unsafe {
31644	static_assert_uimm_bits!(IMM8, `5`);
31645	let a: Simd = a.as_f32x8();
31646	let b: Simd = b.as_f32x8();
31647	let r: i8 = vcmpps256(a, b, IMM8, m:k1 as i8);
31648	r.cast_unsigned()
31649	}
31650	}
31651
31652	/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31653	///
31654	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps_mask&expand=745)
31655	#[inline]
31656	#[target_feature(enable = "avx512f,avx512vl")]
31657	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31658	#[rustc_legacy_const_generics(`2`)]
31659	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
31660	pub fn _mm_cmp_ps_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
31661	unsafe {
31662	static_assert_uimm_bits!(IMM8, `5`);
31663	let neg_one: i8 = `-1`;
31664	let a: Simd = a.as_f32x4();
31665	let b: Simd = b.as_f32x4();
31666	let r: i8 = vcmpps128(a, b, IMM8, m:neg_one);
31667	r.cast_unsigned()
31668	}
31669	}
31670
31671	/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31672	///
31673	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ps_mask&expand=746)
31674	#[inline]
31675	#[target_feature(enable = "avx512f,avx512vl")]
31676	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31677	#[rustc_legacy_const_generics(`3`)]
31678	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
31679	pub fn _mm_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
31680	unsafe {
31681	static_assert_uimm_bits!(IMM8, `5`);
31682	let a: Simd = a.as_f32x4();
31683	let b: Simd = b.as_f32x4();
31684	let r: i8 = vcmpps128(a, b, IMM8, m:k1 as i8);
31685	r.cast_unsigned()
31686	}
31687	}
31688
31689	/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
31690	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
31691	///
31692	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_ps_mask&expand=753)
31693	#[inline]
31694	#[target_feature(enable = "avx512f")]
31695	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31696	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `0`, SAE = `4`))]
31697	#[rustc_legacy_const_generics(`2`, `3`)]
31698	pub fn _mm512_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
31699	a: __m512,
31700	b: __m512,
31701	) -> __mmask16 {
31702	unsafe {
31703	static_assert_uimm_bits!(IMM5, `5`);
31704	static_assert_mantissas_sae!(SAE);
31705	let neg_one: i16 = `-1`;
31706	let a: Simd = a.as_f32x16();
31707	let b: Simd = b.as_f32x16();
31708	let r: i16 = vcmpps(a, b, IMM5, m:neg_one, SAE);
31709	r.cast_unsigned()
31710	}
31711	}
31712
31713	/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
31714	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
31715	///
31716	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_ps_mask&expand=754)
31717	#[inline]
31718	#[target_feature(enable = "avx512f")]
31719	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31720	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `0`, SAE = `4`))]
31721	#[rustc_legacy_const_generics(`3`, `4`)]
31722	pub fn _mm512_mask_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
31723	m: __mmask16,
31724	a: __m512,
31725	b: __m512,
31726	) -> __mmask16 {
31727	unsafe {
31728	static_assert_uimm_bits!(IMM5, `5`);
31729	static_assert_mantissas_sae!(SAE);
31730	let a: Simd = a.as_f32x16();
31731	let b: Simd = b.as_f32x16();
31732	let r: i16 = vcmpps(a, b, IMM5, m as i16, SAE);
31733	r.cast_unsigned()
31734	}
31735	}
31736
31737	/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
31738	///
31739	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_ps_mask&expand=1162)
31740	#[inline]
31741	#[target_feature(enable = "avx512f")]
31742	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31743	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmps
31744	pub fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31745	_mm512_cmp_ps_mask::<_CMP_ORD_Q>(a, b)
31746	}
31747
31748	/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31749	///
31750	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_ps_mask&expand=1163)
31751	#[inline]
31752	#[target_feature(enable = "avx512f")]
31753	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31754	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31755	pub fn _mm512_mask_cmpord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31756	_mm512_mask_cmp_ps_mask::<_CMP_ORD_Q>(k1, a, b)
31757	}
31758
31759	/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
31760	///
31761	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_ps_mask&expand=1170)
31762	#[inline]
31763	#[target_feature(enable = "avx512f")]
31764	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31765	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31766	pub fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31767	_mm512_cmp_ps_mask::<_CMP_UNORD_Q>(a, b)
31768	}
31769
31770	/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31771	///
31772	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_ps_mask&expand=1171)
31773	#[inline]
31774	#[target_feature(enable = "avx512f")]
31775	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31776	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31777	pub fn _mm512_mask_cmpunord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31778	_mm512_mask_cmp_ps_mask::<_CMP_UNORD_Q>(k1, a, b)
31779	}
31780
31781	/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
31782	///
31783	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_pd_mask&expand=1071)
31784	#[inline]
31785	#[target_feature(enable = "avx512f")]
31786	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31787	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31788	pub fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31789	_mm512_cmp_pd_mask::<_CMP_LT_OS>(a, b)
31790	}
31791
31792	/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31793	///
31794	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_pd_mask&expand=1072)
31795	#[inline]
31796	#[target_feature(enable = "avx512f")]
31797	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31798	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31799	pub fn _mm512_mask_cmplt_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31800	_mm512_mask_cmp_pd_mask::<_CMP_LT_OS>(k1, a, b)
31801	}
31802
31803	/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
31804	///
31805	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_pd_mask&expand=1151)
31806	#[inline]
31807	#[target_feature(enable = "avx512f")]
31808	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31809	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31810	pub fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31811	_mm512_cmp_pd_mask::<_CMP_NLT_US>(a, b)
31812	}
31813
31814	/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31815	///
31816	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_pd_mask&expand=1152)
31817	#[inline]
31818	#[target_feature(enable = "avx512f")]
31819	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31820	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31821	pub fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31822	_mm512_mask_cmp_pd_mask::<_CMP_NLT_US>(k1:m, a, b)
31823	}
31824
31825	/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
31826	///
31827	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_pd_mask&expand=1010)
31828	#[inline]
31829	#[target_feature(enable = "avx512f")]
31830	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31831	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31832	pub fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31833	_mm512_cmp_pd_mask::<_CMP_LE_OS>(a, b)
31834	}
31835
31836	/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31837	///
31838	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_pd_mask&expand=1011)
31839	#[inline]
31840	#[target_feature(enable = "avx512f")]
31841	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31842	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31843	pub fn _mm512_mask_cmple_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31844	_mm512_mask_cmp_pd_mask::<_CMP_LE_OS>(k1, a, b)
31845	}
31846
31847	/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
31848	///
31849	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_pd_mask&expand=1143)
31850	#[inline]
31851	#[target_feature(enable = "avx512f")]
31852	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31853	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31854	pub fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31855	_mm512_cmp_pd_mask::<_CMP_NLE_US>(a, b)
31856	}
31857
31858	/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31859	///
31860	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_pd_mask&expand=1144)
31861	#[inline]
31862	#[target_feature(enable = "avx512f")]
31863	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31864	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31865	pub fn _mm512_mask_cmpnle_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31866	_mm512_mask_cmp_pd_mask::<_CMP_NLE_US>(k1, a, b)
31867	}
31868
31869	/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
31870	///
31871	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_pd_mask&expand=822)
31872	#[inline]
31873	#[target_feature(enable = "avx512f")]
31874	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31875	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31876	pub fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31877	_mm512_cmp_pd_mask::<_CMP_EQ_OQ>(a, b)
31878	}
31879
31880	/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31881	///
31882	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_pd_mask&expand=823)
31883	#[inline]
31884	#[target_feature(enable = "avx512f")]
31885	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31886	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31887	pub fn _mm512_mask_cmpeq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31888	_mm512_mask_cmp_pd_mask::<_CMP_EQ_OQ>(k1, a, b)
31889	}
31890
31891	/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
31892	///
31893	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_pd_mask&expand=1127)
31894	#[inline]
31895	#[target_feature(enable = "avx512f")]
31896	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31897	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31898	pub fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31899	_mm512_cmp_pd_mask::<_CMP_NEQ_UQ>(a, b)
31900	}
31901
31902	/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31903	///
31904	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_pd_mask&expand=1128)
31905	#[inline]
31906	#[target_feature(enable = "avx512f")]
31907	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31908	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31909	pub fn _mm512_mask_cmpneq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31910	_mm512_mask_cmp_pd_mask::<_CMP_NEQ_UQ>(k1, a, b)
31911	}
31912
31913	/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31914	///
31915	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_pd_mask&expand=741)
31916	#[inline]
31917	#[target_feature(enable = "avx512f")]
31918	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31919	#[rustc_legacy_const_generics(`2`)]
31920	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
31921	pub fn _mm512_cmp_pd_mask<const IMM8: i32>(a: __m512d, b: __m512d) -> __mmask8 {
31922	unsafe {
31923	static_assert_uimm_bits!(IMM8, `5`);
31924	let neg_one: i8 = `-1`;
31925	let a: Simd = a.as_f64x8();
31926	let b: Simd = b.as_f64x8();
31927	let r: i8 = vcmppd(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
31928	r.cast_unsigned()
31929	}
31930	}
31931
31932	/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31933	///
31934	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_pd_mask&expand=742)
31935	#[inline]
31936	#[target_feature(enable = "avx512f")]
31937	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31938	#[rustc_legacy_const_generics(`3`)]
31939	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
31940	pub fn _mm512_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31941	unsafe {
31942	static_assert_uimm_bits!(IMM8, `5`);
31943	let a: Simd = a.as_f64x8();
31944	let b: Simd = b.as_f64x8();
31945	let r: i8 = vcmppd(a, b, IMM8, m:k1 as i8, _MM_FROUND_CUR_DIRECTION);
31946	r.cast_unsigned()
31947	}
31948	}
31949
31950	/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31951	///
31952	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd_mask&expand=739)
31953	#[inline]
31954	#[target_feature(enable = "avx512f,avx512vl")]
31955	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31956	#[rustc_legacy_const_generics(`2`)]
31957	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
31958	pub fn _mm256_cmp_pd_mask<const IMM8: i32>(a: __m256d, b: __m256d) -> __mmask8 {
31959	unsafe {
31960	static_assert_uimm_bits!(IMM8, `5`);
31961	let neg_one: i8 = `-1`;
31962	let a: Simd = a.as_f64x4();
31963	let b: Simd = b.as_f64x4();
31964	let r: i8 = vcmppd256(a, b, IMM8, m:neg_one);
31965	r.cast_unsigned()
31966	}
31967	}
31968
31969	/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31970	///
31971	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_pd_mask&expand=740)
31972	#[inline]
31973	#[target_feature(enable = "avx512f,avx512vl")]
31974	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31975	#[rustc_legacy_const_generics(`3`)]
31976	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
31977	pub fn _mm256_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d, b: __m256d) -> __mmask8 {
31978	unsafe {
31979	static_assert_uimm_bits!(IMM8, `5`);
31980	let a: Simd = a.as_f64x4();
31981	let b: Simd = b.as_f64x4();
31982	let r: i8 = vcmppd256(a, b, IMM8, m:k1 as i8);
31983	r.cast_unsigned()
31984	}
31985	}
31986
31987	/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31988	///
31989	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd_mask&expand=737)
31990	#[inline]
31991	#[target_feature(enable = "avx512f,avx512vl")]
31992	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31993	#[rustc_legacy_const_generics(`2`)]
31994	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
31995	pub fn _mm_cmp_pd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
31996	unsafe {
31997	static_assert_uimm_bits!(IMM8, `5`);
31998	let neg_one: i8 = `-1`;
31999	let a: Simd = a.as_f64x2();
32000	let b: Simd = b.as_f64x2();
32001	let r: i8 = vcmppd128(a, b, IMM8, m:neg_one);
32002	r.cast_unsigned()
32003	}
32004	}
32005
32006	/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32007	///
32008	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_pd_mask&expand=738)
32009	#[inline]
32010	#[target_feature(enable = "avx512f,avx512vl")]
32011	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32012	#[rustc_legacy_const_generics(`3`)]
32013	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
32014	pub fn _mm_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
32015	unsafe {
32016	static_assert_uimm_bits!(IMM8, `5`);
32017	let a: Simd = a.as_f64x2();
32018	let b: Simd = b.as_f64x2();
32019	let r: i8 = vcmppd128(a, b, IMM8, m:k1 as i8);
32020	r.cast_unsigned()
32021	}
32022	}
32023
32024	/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
32025	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32026	///
32027	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_pd_mask&expand=751)
32028	#[inline]
32029	#[target_feature(enable = "avx512f")]
32030	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32031	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `0`, SAE = `4`))]
32032	#[rustc_legacy_const_generics(`2`, `3`)]
32033	pub fn _mm512_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
32034	a: __m512d,
32035	b: __m512d,
32036	) -> __mmask8 {
32037	unsafe {
32038	static_assert_uimm_bits!(IMM5, `5`);
32039	static_assert_mantissas_sae!(SAE);
32040	let neg_one: i8 = `-1`;
32041	let a: Simd = a.as_f64x8();
32042	let b: Simd = b.as_f64x8();
32043	let r: i8 = vcmppd(a, b, IMM5, m:neg_one, SAE);
32044	r.cast_unsigned()
32045	}
32046	}
32047
32048	/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
32049	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32050	///
32051	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_pd_mask&expand=752)
32052	#[inline]
32053	#[target_feature(enable = "avx512f")]
32054	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32055	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `0`, SAE = `4`))]
32056	#[rustc_legacy_const_generics(`3`, `4`)]
32057	pub fn _mm512_mask_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
32058	k1: __mmask8,
32059	a: __m512d,
32060	b: __m512d,
32061	) -> __mmask8 {
32062	unsafe {
32063	static_assert_uimm_bits!(IMM5, `5`);
32064	static_assert_mantissas_sae!(SAE);
32065	let a: Simd = a.as_f64x8();
32066	let b: Simd = b.as_f64x8();
32067	let r: i8 = vcmppd(a, b, IMM5, m:k1 as i8, SAE);
32068	r.cast_unsigned()
32069	}
32070	}
32071
32072	/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
32073	///
32074	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_pd_mask&expand=1159)
32075	#[inline]
32076	#[target_feature(enable = "avx512f")]
32077	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32078	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
32079	pub fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
32080	_mm512_cmp_pd_mask::<_CMP_ORD_Q>(a, b)
32081	}
32082
32083	/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32084	///
32085	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_pd_mask&expand=1160)
32086	#[inline]
32087	#[target_feature(enable = "avx512f")]
32088	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32089	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
32090	pub fn _mm512_mask_cmpord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
32091	_mm512_mask_cmp_pd_mask::<_CMP_ORD_Q>(k1, a, b)
32092	}
32093
32094	/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
32095	///
32096	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_pd_mask&expand=1167)
32097	#[inline]
32098	#[target_feature(enable = "avx512f")]
32099	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32100	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
32101	pub fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
32102	_mm512_cmp_pd_mask::<_CMP_UNORD_Q>(a, b)
32103	}
32104
32105	/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32106	///
32107	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_pd_mask&expand=1168)
32108	#[inline]
32109	#[target_feature(enable = "avx512f")]
32110	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32111	#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
32112	pub fn _mm512_mask_cmpunord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
32113	_mm512_mask_cmp_pd_mask::<_CMP_UNORD_Q>(k1, a, b)
32114	}
32115
32116	/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
32117	///
32118	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss_mask&expand=763)
32119	#[inline]
32120	#[target_feature(enable = "avx512f")]
32121	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32122	#[rustc_legacy_const_generics(`2`)]
32123	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
32124	pub fn _mm_cmp_ss_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
32125	unsafe {
32126	static_assert_uimm_bits!(IMM8, `5`);
32127	let neg_one: i8 = `-1`;
32128	let r: i8 = vcmpss(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
32129	r.cast_unsigned()
32130	}
32131	}
32132
32133	/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
32134	///
32135	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ss_mask&expand=764)
32136	#[inline]
32137	#[target_feature(enable = "avx512f")]
32138	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32139	#[rustc_legacy_const_generics(`3`)]
32140	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
32141	pub fn _mm_mask_cmp_ss_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
32142	unsafe {
32143	static_assert_uimm_bits!(IMM8, `5`);
32144	let r: i8 = vcmpss(a, b, IMM8, m:k1 as i8, _MM_FROUND_CUR_DIRECTION);
32145	r.cast_unsigned()
32146	}
32147	}
32148
32149	/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
32150	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32151	///
32152	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_ss_mask&expand=757)
32153	#[inline]
32154	#[target_feature(enable = "avx512f")]
32155	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32156	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `0`, SAE = `4`))]
32157	#[rustc_legacy_const_generics(`2`, `3`)]
32158	pub fn _mm_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> __mmask8 {
32159	unsafe {
32160	static_assert_uimm_bits!(IMM5, `5`);
32161	static_assert_mantissas_sae!(SAE);
32162	let neg_one: i8 = `-1`;
32163	let r: i8 = vcmpss(a, b, IMM5, m:neg_one, SAE);
32164	r.cast_unsigned()
32165	}
32166	}
32167
32168	/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not seti).\
32169	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32170	///
32171	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_ss_mask&expand=758)
32172	#[inline]
32173	#[target_feature(enable = "avx512f")]
32174	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32175	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `0`, SAE = `4`))]
32176	#[rustc_legacy_const_generics(`3`, `4`)]
32177	pub fn _mm_mask_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(
32178	k1: __mmask8,
32179	a: __m128,
32180	b: __m128,
32181	) -> __mmask8 {
32182	unsafe {
32183	static_assert_uimm_bits!(IMM5, `5`);
32184	static_assert_mantissas_sae!(SAE);
32185	let r: i8 = vcmpss(a, b, IMM5, m:k1 as i8, SAE);
32186	r.cast_unsigned()
32187	}
32188	}
32189
32190	/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
32191	///
32192	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd_mask&expand=760)
32193	#[inline]
32194	#[target_feature(enable = "avx512f")]
32195	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32196	#[rustc_legacy_const_generics(`2`)]
32197	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
32198	pub fn _mm_cmp_sd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
32199	unsafe {
32200	static_assert_uimm_bits!(IMM8, `5`);
32201	let neg_one: i8 = `-1`;
32202	let r: i8 = vcmpsd(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
32203	r.cast_unsigned()
32204	}
32205	}
32206
32207	/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
32208	///
32209	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_sd_mask&expand=761)
32210	#[inline]
32211	#[target_feature(enable = "avx512f")]
32212	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32213	#[rustc_legacy_const_generics(`3`)]
32214	#[cfg_attr(test, assert_instr(vcmp, IMM8 = `0`))]
32215	pub fn _mm_mask_cmp_sd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
32216	unsafe {
32217	static_assert_uimm_bits!(IMM8, `5`);
32218	let r: i8 = vcmpsd(a, b, IMM8, m:k1 as i8, _MM_FROUND_CUR_DIRECTION);
32219	r.cast_unsigned()
32220	}
32221	}
32222
32223	/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
32224	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32225	///
32226	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_sd_mask&expand=755)
32227	#[inline]
32228	#[target_feature(enable = "avx512f")]
32229	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32230	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `0`, SAE = `4`))]
32231	#[rustc_legacy_const_generics(`2`, `3`)]
32232	pub fn _mm_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __mmask8 {
32233	unsafe {
32234	static_assert_uimm_bits!(IMM5, `5`);
32235	static_assert_mantissas_sae!(SAE);
32236	let neg_one: i8 = `-1`;
32237	let r: i8 = vcmpsd(a, b, IMM5, m:neg_one, SAE);
32238	r.cast_unsigned()
32239	}
32240	}
32241
32242	/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).\
32243	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32244	///
32245	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_sd_mask&expand=756)
32246	#[inline]
32247	#[target_feature(enable = "avx512f")]
32248	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32249	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `0`, SAE = `4`))]
32250	#[rustc_legacy_const_generics(`3`, `4`)]
32251	pub fn _mm_mask_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(
32252	k1: __mmask8,
32253	a: __m128d,
32254	b: __m128d,
32255	) -> __mmask8 {
32256	unsafe {
32257	static_assert_uimm_bits!(IMM5, `5`);
32258	static_assert_mantissas_sae!(SAE);
32259	let r: i8 = vcmpsd(a, b, IMM5, m:k1 as i8, SAE);
32260	r.cast_unsigned()
32261	}
32262	}
32263
32264	/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
32265	///
32266	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu32_mask&expand=1056)
32267	#[inline]
32268	#[target_feature(enable = "avx512f")]
32269	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32270	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32271	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32272	pub const fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32273	unsafe { simd_bitmask::<u32x16, _>(simd_lt(x:a.as_u32x16(), y:b.as_u32x16())) }
32274	}
32275
32276	/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32277	///
32278	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu32_mask&expand=1057)
32279	#[inline]
32280	#[target_feature(enable = "avx512f")]
32281	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32282	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32283	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32284	pub const fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32285	_mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
32286	}
32287
32288	/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
32289	///
32290	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu32_mask&expand=1054)
32291	#[inline]
32292	#[target_feature(enable = "avx512f,avx512vl")]
32293	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32294	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32295	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32296	pub const fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32297	unsafe { simd_bitmask::<u32x8, _>(simd_lt(x:a.as_u32x8(), y:b.as_u32x8())) }
32298	}
32299
32300	/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32301	///
32302	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu32_mask&expand=1055)
32303	#[inline]
32304	#[target_feature(enable = "avx512f,avx512vl")]
32305	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32306	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32307	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32308	pub const fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32309	_mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
32310	}
32311
32312	/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
32313	///
32314	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu32_mask&expand=1052)
32315	#[inline]
32316	#[target_feature(enable = "avx512f,avx512vl")]
32317	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32318	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32319	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32320	pub const fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32321	unsafe { simd_bitmask::<u32x4, _>(simd_lt(x:a.as_u32x4(), y:b.as_u32x4())) }
32322	}
32323
32324	/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32325	///
32326	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu32_mask&expand=1053)
32327	#[inline]
32328	#[target_feature(enable = "avx512f,avx512vl")]
32329	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32330	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32331	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32332	pub const fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32333	_mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
32334	}
32335
32336	/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32337	///
32338	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu32_mask&expand=933)
32339	#[inline]
32340	#[target_feature(enable = "avx512f")]
32341	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32342	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32343	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32344	pub const fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32345	unsafe { simd_bitmask::<u32x16, _>(simd_gt(x:a.as_u32x16(), y:b.as_u32x16())) }
32346	}
32347
32348	/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32349	///
32350	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu32_mask&expand=934)
32351	#[inline]
32352	#[target_feature(enable = "avx512f")]
32353	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32354	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32355	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32356	pub const fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32357	_mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
32358	}
32359
32360	/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32361	///
32362	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu32_mask&expand=931)
32363	#[inline]
32364	#[target_feature(enable = "avx512f,avx512vl")]
32365	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32366	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32367	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32368	pub const fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32369	unsafe { simd_bitmask::<u32x8, _>(simd_gt(x:a.as_u32x8(), y:b.as_u32x8())) }
32370	}
32371
32372	/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32373	///
32374	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu32_mask&expand=932)
32375	#[inline]
32376	#[target_feature(enable = "avx512f,avx512vl")]
32377	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32378	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32379	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32380	pub const fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32381	_mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
32382	}
32383
32384	/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32385	///
32386	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu32_mask&expand=929)
32387	#[inline]
32388	#[target_feature(enable = "avx512f,avx512vl")]
32389	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32390	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32391	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32392	pub const fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32393	unsafe { simd_bitmask::<u32x4, _>(simd_gt(x:a.as_u32x4(), y:b.as_u32x4())) }
32394	}
32395
32396	/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32397	///
32398	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu32_mask&expand=930)
32399	#[inline]
32400	#[target_feature(enable = "avx512f,avx512vl")]
32401	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32402	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32403	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32404	pub const fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32405	_mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
32406	}
32407
32408	/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32409	///
32410	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu32_mask&expand=995)
32411	#[inline]
32412	#[target_feature(enable = "avx512f")]
32413	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32414	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32415	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32416	pub const fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32417	unsafe { simd_bitmask::<u32x16, _>(simd_le(x:a.as_u32x16(), y:b.as_u32x16())) }
32418	}
32419
32420	/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32421	///
32422	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu32_mask&expand=996)
32423	#[inline]
32424	#[target_feature(enable = "avx512f")]
32425	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32426	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32427	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32428	pub const fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32429	_mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
32430	}
32431
32432	/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32433	///
32434	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu32_mask&expand=993)
32435	#[inline]
32436	#[target_feature(enable = "avx512f,avx512vl")]
32437	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32438	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32439	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32440	pub const fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32441	unsafe { simd_bitmask::<u32x8, _>(simd_le(x:a.as_u32x8(), y:b.as_u32x8())) }
32442	}
32443
32444	/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32445	///
32446	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu32_mask&expand=994)
32447	#[inline]
32448	#[target_feature(enable = "avx512f,avx512vl")]
32449	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32450	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32451	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32452	pub const fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32453	_mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
32454	}
32455
32456	/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32457	///
32458	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu32_mask&expand=991)
32459	#[inline]
32460	#[target_feature(enable = "avx512f,avx512vl")]
32461	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32462	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32463	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32464	pub const fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32465	unsafe { simd_bitmask::<u32x4, _>(simd_le(x:a.as_u32x4(), y:b.as_u32x4())) }
32466	}
32467
32468	/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32469	///
32470	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu32_mask&expand=992)
32471	#[inline]
32472	#[target_feature(enable = "avx512f,avx512vl")]
32473	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32474	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32475	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32476	pub const fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32477	_mm_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
32478	}
32479
32480	/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32481	///
32482	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu32_mask&expand=873)
32483	#[inline]
32484	#[target_feature(enable = "avx512f")]
32485	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32486	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32487	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32488	pub const fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32489	unsafe { simd_bitmask::<u32x16, _>(simd_ge(x:a.as_u32x16(), y:b.as_u32x16())) }
32490	}
32491
32492	/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32493	///
32494	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu32_mask&expand=874)
32495	#[inline]
32496	#[target_feature(enable = "avx512f")]
32497	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32498	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32499	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32500	pub const fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32501	_mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
32502	}
32503
32504	/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32505	///
32506	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu32_mask&expand=871)
32507	#[inline]
32508	#[target_feature(enable = "avx512f,avx512vl")]
32509	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32510	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32511	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32512	pub const fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32513	unsafe { simd_bitmask::<u32x8, _>(simd_ge(x:a.as_u32x8(), y:b.as_u32x8())) }
32514	}
32515
32516	/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32517	///
32518	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu32_mask&expand=872)
32519	#[inline]
32520	#[target_feature(enable = "avx512f,avx512vl")]
32521	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32522	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32523	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32524	pub const fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32525	_mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
32526	}
32527
32528	/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32529	///
32530	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu32_mask&expand=869)
32531	#[inline]
32532	#[target_feature(enable = "avx512f,avx512vl")]
32533	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32534	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32535	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32536	pub const fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32537	unsafe { simd_bitmask::<u32x4, _>(simd_ge(x:a.as_u32x4(), y:b.as_u32x4())) }
32538	}
32539
32540	/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32541	///
32542	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu32_mask&expand=870)
32543	#[inline]
32544	#[target_feature(enable = "avx512f,avx512vl")]
32545	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32546	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32547	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32548	pub const fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32549	_mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
32550	}
32551
32552	/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
32553	///
32554	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu32_mask&expand=807)
32555	#[inline]
32556	#[target_feature(enable = "avx512f")]
32557	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32558	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32559	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32560	pub const fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32561	unsafe { simd_bitmask::<u32x16, _>(simd_eq(x:a.as_u32x16(), y:b.as_u32x16())) }
32562	}
32563
32564	/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32565	///
32566	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu32_mask&expand=808)
32567	#[inline]
32568	#[target_feature(enable = "avx512f")]
32569	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32570	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32571	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32572	pub const fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32573	_mm512_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
32574	}
32575
32576	/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
32577	///
32578	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu32_mask&expand=805)
32579	#[inline]
32580	#[target_feature(enable = "avx512f,avx512vl")]
32581	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32582	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32583	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32584	pub const fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32585	unsafe { simd_bitmask::<u32x8, _>(simd_eq(x:a.as_u32x8(), y:b.as_u32x8())) }
32586	}
32587
32588	/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32589	///
32590	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu32_mask&expand=806)
32591	#[inline]
32592	#[target_feature(enable = "avx512f,avx512vl")]
32593	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32594	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32595	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32596	pub const fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32597	_mm256_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
32598	}
32599
32600	/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
32601	///
32602	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu32_mask&expand=803)
32603	#[inline]
32604	#[target_feature(enable = "avx512f,avx512vl")]
32605	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32606	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32607	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32608	pub const fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32609	unsafe { simd_bitmask::<u32x4, _>(simd_eq(x:a.as_u32x4(), y:b.as_u32x4())) }
32610	}
32611
32612	/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32613	///
32614	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu32_mask&expand=804)
32615	#[inline]
32616	#[target_feature(enable = "avx512f,avx512vl")]
32617	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32618	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32619	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32620	pub const fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32621	_mm_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
32622	}
32623
32624	/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
32625	///
32626	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu32_mask&expand=1112)
32627	#[inline]
32628	#[target_feature(enable = "avx512f")]
32629	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32630	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32631	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32632	pub const fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32633	unsafe { simd_bitmask::<u32x16, _>(simd_ne(x:a.as_u32x16(), y:b.as_u32x16())) }
32634	}
32635
32636	/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32637	///
32638	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu32_mask&expand=1113)
32639	#[inline]
32640	#[target_feature(enable = "avx512f")]
32641	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32642	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32643	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32644	pub const fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32645	_mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
32646	}
32647
32648	/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
32649	///
32650	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu32_mask&expand=1110)
32651	#[inline]
32652	#[target_feature(enable = "avx512f,avx512vl")]
32653	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32654	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32655	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32656	pub const fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32657	unsafe { simd_bitmask::<u32x8, _>(simd_ne(x:a.as_u32x8(), y:b.as_u32x8())) }
32658	}
32659
32660	/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32661	///
32662	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu32_mask&expand=1111)
32663	#[inline]
32664	#[target_feature(enable = "avx512f,avx512vl")]
32665	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32666	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32667	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32668	pub const fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32669	_mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
32670	}
32671
32672	/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
32673	///
32674	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu32_mask&expand=1108)
32675	#[inline]
32676	#[target_feature(enable = "avx512f,avx512vl")]
32677	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32678	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32679	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32680	pub const fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32681	unsafe { simd_bitmask::<u32x4, _>(simd_ne(x:a.as_u32x4(), y:b.as_u32x4())) }
32682	}
32683
32684	/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32685	///
32686	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu32_mask&expand=1109)
32687	#[inline]
32688	#[target_feature(enable = "avx512f,avx512vl")]
32689	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32690	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32691	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32692	pub const fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32693	_mm_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
32694	}
32695
32696	/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32697	///
32698	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu32_mask&expand=721)
32699	#[inline]
32700	#[target_feature(enable = "avx512f")]
32701	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32702	#[rustc_legacy_const_generics(`2`)]
32703	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
32704	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32705	pub const fn _mm512_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32706	a: __m512i,
32707	b: __m512i,
32708	) -> __mmask16 {
32709	unsafe {
32710	static_assert_uimm_bits!(IMM3, `3`);
32711	let a: Simd = a.as_u32x16();
32712	let b: Simd = b.as_u32x16();
32713	let r: Simd = match IMM3 {
32714	`0` => simd_eq(x:a, y:b),
32715	`1` => simd_lt(x:a, y:b),
32716	`2` => simd_le(x:a, y:b),
32717	`3` => i32x16::ZERO,
32718	`4` => simd_ne(x:a, y:b),
32719	`5` => simd_ge(x:a, y:b),
32720	`6` => simd_gt(x:a, y:b),
32721	_ => i32x16::splat(`-1`),
32722	};
32723	simd_bitmask(r)
32724	}
32725	}
32726
32727	/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32728	///
32729	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu32_mask&expand=722)
32730	#[inline]
32731	#[target_feature(enable = "avx512f")]
32732	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32733	#[rustc_legacy_const_generics(`3`)]
32734	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
32735	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32736	pub const fn _mm512_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32737	k1: __mmask16,
32738	a: __m512i,
32739	b: __m512i,
32740	) -> __mmask16 {
32741	unsafe {
32742	static_assert_uimm_bits!(IMM3, `3`);
32743	let a: Simd = a.as_u32x16();
32744	let b: Simd = b.as_u32x16();
32745	let k1: Simd = simd_select_bitmask(m:k1, yes:i32x16::splat(`-1`), no:i32x16::ZERO);
32746	let r: Simd = match IMM3 {
32747	`0` => simd_and(x:k1, y:simd_eq(x:a, y:b)),
32748	`1` => simd_and(x:k1, y:simd_lt(x:a, y:b)),
32749	`2` => simd_and(x:k1, y:simd_le(x:a, y:b)),
32750	`3` => i32x16::ZERO,
32751	`4` => simd_and(x:k1, y:simd_ne(x:a, y:b)),
32752	`5` => simd_and(x:k1, y:simd_ge(x:a, y:b)),
32753	`6` => simd_and(x:k1, y:simd_gt(x:a, y:b)),
32754	_ => k1,
32755	};
32756	simd_bitmask(r)
32757	}
32758	}
32759
32760	/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32761	///
32762	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu32_mask&expand=719)
32763	#[inline]
32764	#[target_feature(enable = "avx512f,avx512vl")]
32765	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32766	#[rustc_legacy_const_generics(`2`)]
32767	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
32768	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32769	pub const fn _mm256_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32770	a: __m256i,
32771	b: __m256i,
32772	) -> __mmask8 {
32773	unsafe {
32774	static_assert_uimm_bits!(IMM3, `3`);
32775	let a: Simd = a.as_u32x8();
32776	let b: Simd = b.as_u32x8();
32777	let r: Simd = match IMM3 {
32778	`0` => simd_eq(x:a, y:b),
32779	`1` => simd_lt(x:a, y:b),
32780	`2` => simd_le(x:a, y:b),
32781	`3` => i32x8::ZERO,
32782	`4` => simd_ne(x:a, y:b),
32783	`5` => simd_ge(x:a, y:b),
32784	`6` => simd_gt(x:a, y:b),
32785	_ => i32x8::splat(`-1`),
32786	};
32787	simd_bitmask(r)
32788	}
32789	}
32790
32791	/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32792	///
32793	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu32_mask&expand=720)
32794	#[inline]
32795	#[target_feature(enable = "avx512f,avx512vl")]
32796	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32797	#[rustc_legacy_const_generics(`3`)]
32798	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
32799	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32800	pub const fn _mm256_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32801	k1: __mmask8,
32802	a: __m256i,
32803	b: __m256i,
32804	) -> __mmask8 {
32805	unsafe {
32806	static_assert_uimm_bits!(IMM3, `3`);
32807	let a: Simd = a.as_u32x8();
32808	let b: Simd = b.as_u32x8();
32809	let k1: Simd = simd_select_bitmask(m:k1, yes:i32x8::splat(`-1`), no:i32x8::ZERO);
32810	let r: Simd = match IMM3 {
32811	`0` => simd_and(x:k1, y:simd_eq(x:a, y:b)),
32812	`1` => simd_and(x:k1, y:simd_lt(x:a, y:b)),
32813	`2` => simd_and(x:k1, y:simd_le(x:a, y:b)),
32814	`3` => i32x8::ZERO,
32815	`4` => simd_and(x:k1, y:simd_ne(x:a, y:b)),
32816	`5` => simd_and(x:k1, y:simd_ge(x:a, y:b)),
32817	`6` => simd_and(x:k1, y:simd_gt(x:a, y:b)),
32818	_ => k1,
32819	};
32820	simd_bitmask(r)
32821	}
32822	}
32823
32824	/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32825	///
32826	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu32_mask&expand=717)
32827	#[inline]
32828	#[target_feature(enable = "avx512f,avx512vl")]
32829	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32830	#[rustc_legacy_const_generics(`2`)]
32831	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
32832	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32833	pub const fn _mm_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
32834	unsafe {
32835	static_assert_uimm_bits!(IMM3, `3`);
32836	let a: Simd = a.as_u32x4();
32837	let b: Simd = b.as_u32x4();
32838	let r: Simd = match IMM3 {
32839	`0` => simd_eq(x:a, y:b),
32840	`1` => simd_lt(x:a, y:b),
32841	`2` => simd_le(x:a, y:b),
32842	`3` => i32x4::ZERO,
32843	`4` => simd_ne(x:a, y:b),
32844	`5` => simd_ge(x:a, y:b),
32845	`6` => simd_gt(x:a, y:b),
32846	_ => i32x4::splat(`-1`),
32847	};
32848	simd_bitmask(r)
32849	}
32850	}
32851
32852	/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32853	///
32854	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu32_mask&expand=718)
32855	#[inline]
32856	#[target_feature(enable = "avx512f,avx512vl")]
32857	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32858	#[rustc_legacy_const_generics(`3`)]
32859	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
32860	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32861	pub const fn _mm_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32862	k1: __mmask8,
32863	a: __m128i,
32864	b: __m128i,
32865	) -> __mmask8 {
32866	unsafe {
32867	static_assert_uimm_bits!(IMM3, `3`);
32868	let a: Simd = a.as_u32x4();
32869	let b: Simd = b.as_u32x4();
32870	let k1: Simd = simd_select_bitmask(m:k1, yes:i32x4::splat(`-1`), no:i32x4::ZERO);
32871	let r: Simd = match IMM3 {
32872	`0` => simd_and(x:k1, y:simd_eq(x:a, y:b)),
32873	`1` => simd_and(x:k1, y:simd_lt(x:a, y:b)),
32874	`2` => simd_and(x:k1, y:simd_le(x:a, y:b)),
32875	`3` => i32x4::ZERO,
32876	`4` => simd_and(x:k1, y:simd_ne(x:a, y:b)),
32877	`5` => simd_and(x:k1, y:simd_ge(x:a, y:b)),
32878	`6` => simd_and(x:k1, y:simd_gt(x:a, y:b)),
32879	_ => k1,
32880	};
32881	simd_bitmask(r)
32882	}
32883	}
32884
32885	/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
32886	///
32887	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi32_mask&expand=1029)
32888	#[inline]
32889	#[target_feature(enable = "avx512f")]
32890	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32891	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32892	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32893	pub const fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32894	unsafe { simd_bitmask::<i32x16, _>(simd_lt(x:a.as_i32x16(), y:b.as_i32x16())) }
32895	}
32896
32897	/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32898	///
32899	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi32_mask&expand=1031)
32900	#[inline]
32901	#[target_feature(enable = "avx512f")]
32902	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32903	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32904	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32905	pub const fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32906	_mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
32907	}
32908
32909	/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
32910	///
32911	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi32_mask&expand=1027)
32912	#[inline]
32913	#[target_feature(enable = "avx512f,avx512vl")]
32914	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32915	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32916	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32917	pub const fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32918	unsafe { simd_bitmask::<i32x8, _>(simd_lt(x:a.as_i32x8(), y:b.as_i32x8())) }
32919	}
32920
32921	/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32922	///
32923	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi32_mask&expand=1028)
32924	#[inline]
32925	#[target_feature(enable = "avx512f,avx512vl")]
32926	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32927	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32928	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32929	pub const fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32930	_mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
32931	}
32932
32933	/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
32934	///
32935	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32_mask&expand=1025)
32936	#[inline]
32937	#[target_feature(enable = "avx512f,avx512vl")]
32938	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32939	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32940	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32941	pub const fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32942	unsafe { simd_bitmask::<i32x4, _>(simd_lt(x:a.as_i32x4(), y:b.as_i32x4())) }
32943	}
32944
32945	/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32946	///
32947	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi32_mask&expand=1026)
32948	#[inline]
32949	#[target_feature(enable = "avx512f,avx512vl")]
32950	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32951	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32952	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32953	pub const fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32954	_mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
32955	}
32956
32957	/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32958	///
32959	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi32_mask&expand=905)
32960	#[inline]
32961	#[target_feature(enable = "avx512f")]
32962	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32963	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32964	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32965	pub const fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32966	unsafe { simd_bitmask::<i32x16, _>(simd_gt(x:a.as_i32x16(), y:b.as_i32x16())) }
32967	}
32968
32969	/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32970	///
32971	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi32_mask&expand=906)
32972	#[inline]
32973	#[target_feature(enable = "avx512f")]
32974	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32975	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32976	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32977	pub const fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32978	_mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
32979	}
32980
32981	/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32982	///
32983	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32_mask&expand=903)
32984	#[inline]
32985	#[target_feature(enable = "avx512f,avx512vl")]
32986	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32987	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32988	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32989	pub const fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32990	unsafe { simd_bitmask::<i32x8, _>(simd_gt(x:a.as_i32x8(), y:b.as_i32x8())) }
32991	}
32992
32993	/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32994	///
32995	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi32_mask&expand=904)
32996	#[inline]
32997	#[target_feature(enable = "avx512f,avx512vl")]
32998	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32999	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33000	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33001	pub const fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33002	_mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
33003	}
33004
33005	/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
33006	///
33007	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32_mask&expand=901)
33008	#[inline]
33009	#[target_feature(enable = "avx512f,avx512vl")]
33010	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33011	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33012	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33013	pub const fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33014	unsafe { simd_bitmask::<i32x4, _>(simd_gt(x:a.as_i32x4(), y:b.as_i32x4())) }
33015	}
33016
33017	/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33018	///
33019	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi32_mask&expand=902)
33020	#[inline]
33021	#[target_feature(enable = "avx512f,avx512vl")]
33022	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33023	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33024	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33025	pub const fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33026	_mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
33027	}
33028
33029	/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33030	///
33031	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi32_mask&expand=971)
33032	#[inline]
33033	#[target_feature(enable = "avx512f")]
33034	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33035	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33036	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33037	pub const fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
33038	unsafe { simd_bitmask::<i32x16, _>(simd_le(x:a.as_i32x16(), y:b.as_i32x16())) }
33039	}
33040
33041	/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33042	///
33043	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi32_mask&expand=972)
33044	#[inline]
33045	#[target_feature(enable = "avx512f")]
33046	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33047	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33048	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33049	pub const fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
33050	_mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
33051	}
33052
33053	/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33054	///
33055	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi32_mask&expand=969)
33056	#[inline]
33057	#[target_feature(enable = "avx512f,avx512vl")]
33058	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33059	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33060	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33061	pub const fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
33062	unsafe { simd_bitmask::<i32x8, _>(simd_le(x:a.as_i32x8(), y:b.as_i32x8())) }
33063	}
33064
33065	/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33066	///
33067	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi32_mask&expand=970)
33068	#[inline]
33069	#[target_feature(enable = "avx512f,avx512vl")]
33070	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33071	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33072	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33073	pub const fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33074	_mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
33075	}
33076
33077	/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33078	///
33079	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi32_mask&expand=967)
33080	#[inline]
33081	#[target_feature(enable = "avx512f,avx512vl")]
33082	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33083	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33084	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33085	pub const fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33086	unsafe { simd_bitmask::<i32x4, _>(simd_le(x:a.as_i32x4(), y:b.as_i32x4())) }
33087	}
33088
33089	/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33090	///
33091	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi32_mask&expand=968)
33092	#[inline]
33093	#[target_feature(enable = "avx512f,avx512vl")]
33094	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33095	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33096	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33097	pub const fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33098	_mm_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
33099	}
33100
33101	/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33102	///
33103	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi32_mask&expand=849)
33104	#[inline]
33105	#[target_feature(enable = "avx512f")]
33106	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33107	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33108	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33109	pub const fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
33110	unsafe { simd_bitmask::<i32x16, _>(simd_ge(x:a.as_i32x16(), y:b.as_i32x16())) }
33111	}
33112
33113	/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33114	///
33115	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi32_mask&expand=850)
33116	#[inline]
33117	#[target_feature(enable = "avx512f")]
33118	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33119	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33120	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33121	pub const fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
33122	_mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
33123	}
33124
33125	/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33126	///
33127	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi32_mask&expand=847)
33128	#[inline]
33129	#[target_feature(enable = "avx512f,avx512vl")]
33130	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33131	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33132	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33133	pub const fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
33134	unsafe { simd_bitmask::<i32x8, _>(simd_ge(x:a.as_i32x8(), y:b.as_i32x8())) }
33135	}
33136
33137	/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33138	///
33139	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi32_mask&expand=848)
33140	#[inline]
33141	#[target_feature(enable = "avx512f,avx512vl")]
33142	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33143	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33144	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33145	pub const fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33146	_mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
33147	}
33148
33149	/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33150	///
33151	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi32_mask&expand=845)
33152	#[inline]
33153	#[target_feature(enable = "avx512f,avx512vl")]
33154	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33155	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33156	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33157	pub const fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33158	unsafe { simd_bitmask::<i32x4, _>(simd_ge(x:a.as_i32x4(), y:b.as_i32x4())) }
33159	}
33160
33161	/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33162	///
33163	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi32_mask&expand=846)
33164	#[inline]
33165	#[target_feature(enable = "avx512f,avx512vl")]
33166	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33167	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33168	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33169	pub const fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33170	_mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
33171	}
33172
33173	/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
33174	///
33175	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi32_mask&expand=779)
33176	#[inline]
33177	#[target_feature(enable = "avx512f")]
33178	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33179	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33180	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33181	pub const fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
33182	unsafe { simd_bitmask::<i32x16, _>(simd_eq(x:a.as_i32x16(), y:b.as_i32x16())) }
33183	}
33184
33185	/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33186	///
33187	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi32_mask&expand=780)
33188	#[inline]
33189	#[target_feature(enable = "avx512f")]
33190	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33191	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33192	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33193	pub const fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
33194	_mm512_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
33195	}
33196
33197	/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
33198	///
33199	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32_mask&expand=777)
33200	#[inline]
33201	#[target_feature(enable = "avx512f,avx512vl")]
33202	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33203	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33204	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33205	pub const fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
33206	unsafe { simd_bitmask::<i32x8, _>(simd_eq(x:a.as_i32x8(), y:b.as_i32x8())) }
33207	}
33208
33209	/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33210	///
33211	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi32_mask&expand=778)
33212	#[inline]
33213	#[target_feature(enable = "avx512f,avx512vl")]
33214	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33215	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33216	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33217	pub const fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33218	_mm256_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
33219	}
33220
33221	/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
33222	///
33223	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32_mask&expand=775)
33224	#[inline]
33225	#[target_feature(enable = "avx512f,avx512vl")]
33226	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33227	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33228	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33229	pub const fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33230	unsafe { simd_bitmask::<i32x4, _>(simd_eq(x:a.as_i32x4(), y:b.as_i32x4())) }
33231	}
33232
33233	/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33234	///
33235	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi32_mask&expand=776)
33236	#[inline]
33237	#[target_feature(enable = "avx512f,avx512vl")]
33238	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33239	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33240	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33241	pub const fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33242	_mm_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
33243	}
33244
33245	/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
33246	///
33247	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi32_mask&expand=1088)
33248	#[inline]
33249	#[target_feature(enable = "avx512f")]
33250	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33251	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33252	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33253	pub const fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
33254	unsafe { simd_bitmask::<i32x16, _>(simd_ne(x:a.as_i32x16(), y:b.as_i32x16())) }
33255	}
33256
33257	/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33258	///
33259	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi32_mask&expand=1089)
33260	#[inline]
33261	#[target_feature(enable = "avx512f")]
33262	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33263	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33264	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33265	pub const fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
33266	_mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
33267	}
33268
33269	/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
33270	///
33271	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi32_mask&expand=1086)
33272	#[inline]
33273	#[target_feature(enable = "avx512f,avx512vl")]
33274	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33275	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33276	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33277	pub const fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
33278	unsafe { simd_bitmask::<i32x8, _>(simd_ne(x:a.as_i32x8(), y:b.as_i32x8())) }
33279	}
33280
33281	/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33282	///
33283	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi32_mask&expand=1087)
33284	#[inline]
33285	#[target_feature(enable = "avx512f,avx512vl")]
33286	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33287	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33288	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33289	pub const fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33290	_mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
33291	}
33292
33293	/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
33294	///
33295	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi32_mask&expand=1084)
33296	#[inline]
33297	#[target_feature(enable = "avx512f,avx512vl")]
33298	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33299	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33300	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33301	pub const fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33302	unsafe { simd_bitmask::<i32x4, _>(simd_ne(x:a.as_i32x4(), y:b.as_i32x4())) }
33303	}
33304
33305	/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33306	///
33307	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi32_mask&expand=1085)
33308	#[inline]
33309	#[target_feature(enable = "avx512f,avx512vl")]
33310	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33311	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33312	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33313	pub const fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33314	_mm_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
33315	}
33316
33317	/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33318	///
33319	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi32_mask&expand=697)
33320	#[inline]
33321	#[target_feature(enable = "avx512f")]
33322	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33323	#[rustc_legacy_const_generics(`2`)]
33324	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
33325	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33326	pub const fn _mm512_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33327	a: __m512i,
33328	b: __m512i,
33329	) -> __mmask16 {
33330	unsafe {
33331	static_assert_uimm_bits!(IMM3, `3`);
33332	let a: Simd = a.as_i32x16();
33333	let b: Simd = b.as_i32x16();
33334	let r: Simd = match IMM3 {
33335	`0` => simd_eq(x:a, y:b),
33336	`1` => simd_lt(x:a, y:b),
33337	`2` => simd_le(x:a, y:b),
33338	`3` => i32x16::ZERO,
33339	`4` => simd_ne(x:a, y:b),
33340	`5` => simd_ge(x:a, y:b),
33341	`6` => simd_gt(x:a, y:b),
33342	_ => i32x16::splat(`-1`),
33343	};
33344	simd_bitmask(r)
33345	}
33346	}
33347
33348	/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33349	///
33350	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi32_mask&expand=698)
33351	#[inline]
33352	#[target_feature(enable = "avx512f")]
33353	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33354	#[rustc_legacy_const_generics(`3`)]
33355	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
33356	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33357	pub const fn _mm512_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33358	k1: __mmask16,
33359	a: __m512i,
33360	b: __m512i,
33361	) -> __mmask16 {
33362	unsafe {
33363	static_assert_uimm_bits!(IMM3, `3`);
33364	let a: Simd = a.as_i32x16();
33365	let b: Simd = b.as_i32x16();
33366	let k1: Simd = simd_select_bitmask(m:k1, yes:i32x16::splat(`-1`), no:i32x16::ZERO);
33367	let r: Simd = match IMM3 {
33368	`0` => simd_and(x:k1, y:simd_eq(x:a, y:b)),
33369	`1` => simd_and(x:k1, y:simd_lt(x:a, y:b)),
33370	`2` => simd_and(x:k1, y:simd_le(x:a, y:b)),
33371	`3` => i32x16::ZERO,
33372	`4` => simd_and(x:k1, y:simd_ne(x:a, y:b)),
33373	`5` => simd_and(x:k1, y:simd_ge(x:a, y:b)),
33374	`6` => simd_and(x:k1, y:simd_gt(x:a, y:b)),
33375	_ => k1,
33376	};
33377	simd_bitmask(r)
33378	}
33379	}
33380
33381	/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33382	///
33383	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_cmp_epi32_mask&expand=695)
33384	#[inline]
33385	#[target_feature(enable = "avx512f,avx512vl")]
33386	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33387	#[rustc_legacy_const_generics(`2`)]
33388	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
33389	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33390	pub const fn _mm256_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33391	a: __m256i,
33392	b: __m256i,
33393	) -> __mmask8 {
33394	unsafe {
33395	static_assert_uimm_bits!(IMM3, `3`);
33396	let a: Simd = a.as_i32x8();
33397	let b: Simd = b.as_i32x8();
33398	let r: Simd = match IMM3 {
33399	`0` => simd_eq(x:a, y:b),
33400	`1` => simd_lt(x:a, y:b),
33401	`2` => simd_le(x:a, y:b),
33402	`3` => i32x8::ZERO,
33403	`4` => simd_ne(x:a, y:b),
33404	`5` => simd_ge(x:a, y:b),
33405	`6` => simd_gt(x:a, y:b),
33406	_ => i32x8::splat(`-1`),
33407	};
33408	simd_bitmask(r)
33409	}
33410	}
33411
33412	/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33413	///
33414	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi32_mask&expand=696)
33415	#[inline]
33416	#[target_feature(enable = "avx512f,avx512vl")]
33417	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33418	#[rustc_legacy_const_generics(`3`)]
33419	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
33420	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33421	pub const fn _mm256_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33422	k1: __mmask8,
33423	a: __m256i,
33424	b: __m256i,
33425	) -> __mmask8 {
33426	unsafe {
33427	static_assert_uimm_bits!(IMM3, `3`);
33428	let a: Simd = a.as_i32x8();
33429	let b: Simd = b.as_i32x8();
33430	let k1: Simd = simd_select_bitmask(m:k1, yes:i32x8::splat(`-1`), no:i32x8::ZERO);
33431	let r: Simd = match IMM3 {
33432	`0` => simd_and(x:k1, y:simd_eq(x:a, y:b)),
33433	`1` => simd_and(x:k1, y:simd_lt(x:a, y:b)),
33434	`2` => simd_and(x:k1, y:simd_le(x:a, y:b)),
33435	`3` => i32x8::ZERO,
33436	`4` => simd_and(x:k1, y:simd_ne(x:a, y:b)),
33437	`5` => simd_and(x:k1, y:simd_ge(x:a, y:b)),
33438	`6` => simd_and(x:k1, y:simd_gt(x:a, y:b)),
33439	_ => k1,
33440	};
33441	simd_bitmask(r)
33442	}
33443	}
33444
33445	/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33446	///
33447	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi32_mask&expand=693)
33448	#[inline]
33449	#[target_feature(enable = "avx512f,avx512vl")]
33450	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33451	#[rustc_legacy_const_generics(`2`)]
33452	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
33453	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33454	pub const fn _mm_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
33455	unsafe {
33456	static_assert_uimm_bits!(IMM3, `3`);
33457	let a: Simd = a.as_i32x4();
33458	let b: Simd = b.as_i32x4();
33459	let r: Simd = match IMM3 {
33460	`0` => simd_eq(x:a, y:b),
33461	`1` => simd_lt(x:a, y:b),
33462	`2` => simd_le(x:a, y:b),
33463	`3` => i32x4::ZERO,
33464	`4` => simd_ne(x:a, y:b),
33465	`5` => simd_ge(x:a, y:b),
33466	`6` => simd_gt(x:a, y:b),
33467	_ => i32x4::splat(`-1`),
33468	};
33469	simd_bitmask(r)
33470	}
33471	}
33472
33473	/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33474	///
33475	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi32_mask&expand=694)
33476	#[inline]
33477	#[target_feature(enable = "avx512f,avx512vl")]
33478	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33479	#[rustc_legacy_const_generics(`3`)]
33480	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
33481	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33482	pub const fn _mm_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33483	k1: __mmask8,
33484	a: __m128i,
33485	b: __m128i,
33486	) -> __mmask8 {
33487	unsafe {
33488	static_assert_uimm_bits!(IMM3, `3`);
33489	let a: Simd = a.as_i32x4();
33490	let b: Simd = b.as_i32x4();
33491	let k1: Simd = simd_select_bitmask(m:k1, yes:i32x4::splat(`-1`), no:i32x4::ZERO);
33492	let r: Simd = match IMM3 {
33493	`0` => simd_and(x:k1, y:simd_eq(x:a, y:b)),
33494	`1` => simd_and(x:k1, y:simd_lt(x:a, y:b)),
33495	`2` => simd_and(x:k1, y:simd_le(x:a, y:b)),
33496	`3` => i32x4::ZERO,
33497	`4` => simd_and(x:k1, y:simd_ne(x:a, y:b)),
33498	`5` => simd_and(x:k1, y:simd_ge(x:a, y:b)),
33499	`6` => simd_and(x:k1, y:simd_gt(x:a, y:b)),
33500	_ => k1,
33501	};
33502	simd_bitmask(r)
33503	}
33504	}
33505
33506	/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
33507	///
33508	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu64_mask&expand=1062)
33509	#[inline]
33510	#[target_feature(enable = "avx512f")]
33511	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33512	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33513	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33514	pub const fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33515	unsafe { simd_bitmask::<__m512i, _>(simd_lt(x:a.as_u64x8(), y:b.as_u64x8())) }
33516	}
33517
33518	/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33519	///
33520	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu64_mask&expand=1063)
33521	#[inline]
33522	#[target_feature(enable = "avx512f")]
33523	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33524	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33525	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33526	pub const fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33527	_mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
33528	}
33529
33530	/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
33531	///
33532	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu64_mask&expand=1060)
33533	#[inline]
33534	#[target_feature(enable = "avx512f,avx512vl")]
33535	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33536	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33537	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33538	pub const fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33539	unsafe { simd_bitmask::<__m256i, _>(simd_lt(x:a.as_u64x4(), y:b.as_u64x4())) }
33540	}
33541
33542	/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33543	///
33544	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu64_mask&expand=1061)
33545	#[inline]
33546	#[target_feature(enable = "avx512f,avx512vl")]
33547	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33548	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33549	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33550	pub const fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33551	_mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
33552	}
33553
33554	/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
33555	///
33556	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu64_mask&expand=1058)
33557	#[inline]
33558	#[target_feature(enable = "avx512f,avx512vl")]
33559	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33560	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33561	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33562	pub const fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33563	unsafe { simd_bitmask::<__m128i, _>(simd_lt(x:a.as_u64x2(), y:b.as_u64x2())) }
33564	}
33565
33566	/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33567	///
33568	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu64_mask&expand=1059)
33569	#[inline]
33570	#[target_feature(enable = "avx512f,avx512vl")]
33571	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33572	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33573	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33574	pub const fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33575	_mm_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
33576	}
33577
33578	/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
33579	///
33580	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu64_mask&expand=939)
33581	#[inline]
33582	#[target_feature(enable = "avx512f")]
33583	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33584	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33585	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33586	pub const fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33587	unsafe { simd_bitmask::<__m512i, _>(simd_gt(x:a.as_u64x8(), y:b.as_u64x8())) }
33588	}
33589
33590	/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33591	///
33592	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu64_mask&expand=940)
33593	#[inline]
33594	#[target_feature(enable = "avx512f")]
33595	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33596	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33597	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33598	pub const fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33599	_mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
33600	}
33601
33602	/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
33603	///
33604	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu64_mask&expand=937)
33605	#[inline]
33606	#[target_feature(enable = "avx512f,avx512vl")]
33607	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33608	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33609	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33610	pub const fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33611	unsafe { simd_bitmask::<__m256i, _>(simd_gt(x:a.as_u64x4(), y:b.as_u64x4())) }
33612	}
33613
33614	/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33615	///
33616	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu64_mask&expand=938)
33617	#[inline]
33618	#[target_feature(enable = "avx512f,avx512vl")]
33619	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33620	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33621	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33622	pub const fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33623	_mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
33624	}
33625
33626	/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
33627	///
33628	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu64_mask&expand=935)
33629	#[inline]
33630	#[target_feature(enable = "avx512f,avx512vl")]
33631	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33632	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33633	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33634	pub const fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33635	unsafe { simd_bitmask::<__m128i, _>(simd_gt(x:a.as_u64x2(), y:b.as_u64x2())) }
33636	}
33637
33638	/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33639	///
33640	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu64_mask&expand=936)
33641	#[inline]
33642	#[target_feature(enable = "avx512f,avx512vl")]
33643	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33644	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33645	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33646	pub const fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33647	_mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
33648	}
33649
33650	/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33651	///
33652	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu64_mask&expand=1001)
33653	#[inline]
33654	#[target_feature(enable = "avx512f")]
33655	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33656	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33657	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33658	pub const fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33659	unsafe { simd_bitmask::<__m512i, _>(simd_le(x:a.as_u64x8(), y:b.as_u64x8())) }
33660	}
33661
33662	/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33663	///
33664	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu64_mask&expand=1002)
33665	#[inline]
33666	#[target_feature(enable = "avx512f")]
33667	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33668	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33669	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33670	pub const fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33671	_mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
33672	}
33673
33674	/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33675	///
33676	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu64_mask&expand=999)
33677	#[inline]
33678	#[target_feature(enable = "avx512f,avx512vl")]
33679	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33680	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33681	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33682	pub const fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33683	unsafe { simd_bitmask::<__m256i, _>(simd_le(x:a.as_u64x4(), y:b.as_u64x4())) }
33684	}
33685
33686	/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33687	///
33688	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu64_mask&expand=1000)
33689	#[inline]
33690	#[target_feature(enable = "avx512f,avx512vl")]
33691	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33692	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33693	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33694	pub const fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33695	_mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
33696	}
33697
33698	/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33699	///
33700	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu64_mask&expand=997)
33701	#[inline]
33702	#[target_feature(enable = "avx512f,avx512vl")]
33703	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33704	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33705	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33706	pub const fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33707	unsafe { simd_bitmask::<__m128i, _>(simd_le(x:a.as_u64x2(), y:b.as_u64x2())) }
33708	}
33709
33710	/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33711	///
33712	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu64_mask&expand=998)
33713	#[inline]
33714	#[target_feature(enable = "avx512f,avx512vl")]
33715	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33716	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33717	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33718	pub const fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33719	_mm_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
33720	}
33721
33722	/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33723	///
33724	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu64_mask&expand=879)
33725	#[inline]
33726	#[target_feature(enable = "avx512f")]
33727	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33728	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33729	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33730	pub const fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33731	unsafe { simd_bitmask::<__m512i, _>(simd_ge(x:a.as_u64x8(), y:b.as_u64x8())) }
33732	}
33733
33734	/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33735	///
33736	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu64_mask&expand=880)
33737	#[inline]
33738	#[target_feature(enable = "avx512f")]
33739	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33740	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33741	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33742	pub const fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33743	_mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
33744	}
33745
33746	/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33747	///
33748	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu64_mask&expand=877)
33749	#[inline]
33750	#[target_feature(enable = "avx512f,avx512vl")]
33751	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33752	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33753	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33754	pub const fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33755	unsafe { simd_bitmask::<__m256i, _>(simd_ge(x:a.as_u64x4(), y:b.as_u64x4())) }
33756	}
33757
33758	/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33759	///
33760	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu64_mask&expand=878)
33761	#[inline]
33762	#[target_feature(enable = "avx512f,avx512vl")]
33763	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33764	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33765	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33766	pub const fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33767	_mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
33768	}
33769
33770	/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33771	///
33772	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu64_mask&expand=875)
33773	#[inline]
33774	#[target_feature(enable = "avx512f,avx512vl")]
33775	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33776	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33777	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33778	pub const fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33779	unsafe { simd_bitmask::<__m128i, _>(simd_ge(x:a.as_u64x2(), y:b.as_u64x2())) }
33780	}
33781
33782	/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33783	///
33784	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu64_mask&expand=876)
33785	#[inline]
33786	#[target_feature(enable = "avx512f,avx512vl")]
33787	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33788	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33789	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33790	pub const fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33791	_mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
33792	}
33793
33794	/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
33795	///
33796	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu64_mask&expand=813)
33797	#[inline]
33798	#[target_feature(enable = "avx512f")]
33799	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33800	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33801	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33802	pub const fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33803	unsafe { simd_bitmask::<__m512i, _>(simd_eq(x:a.as_u64x8(), y:b.as_u64x8())) }
33804	}
33805
33806	/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33807	///
33808	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu64_mask&expand=814)
33809	#[inline]
33810	#[target_feature(enable = "avx512f")]
33811	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33812	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33813	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33814	pub const fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33815	_mm512_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
33816	}
33817
33818	/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
33819	///
33820	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu64_mask&expand=811)
33821	#[inline]
33822	#[target_feature(enable = "avx512f,avx512vl")]
33823	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33824	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33825	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33826	pub const fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33827	unsafe { simd_bitmask::<__m256i, _>(simd_eq(x:a.as_u64x4(), y:b.as_u64x4())) }
33828	}
33829
33830	/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33831	///
33832	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu64_mask&expand=812)
33833	#[inline]
33834	#[target_feature(enable = "avx512f,avx512vl")]
33835	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33836	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33837	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33838	pub const fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33839	_mm256_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
33840	}
33841
33842	/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
33843	///
33844	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu64_mask&expand=809)
33845	#[inline]
33846	#[target_feature(enable = "avx512f,avx512vl")]
33847	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33848	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33849	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33850	pub const fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33851	unsafe { simd_bitmask::<__m128i, _>(simd_eq(x:a.as_u64x2(), y:b.as_u64x2())) }
33852	}
33853
33854	/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33855	///
33856	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu64_mask&expand=810)
33857	#[inline]
33858	#[target_feature(enable = "avx512f,avx512vl")]
33859	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33860	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33861	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33862	pub const fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33863	_mm_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
33864	}
33865
33866	/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
33867	///
33868	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu64_mask&expand=1118)
33869	#[inline]
33870	#[target_feature(enable = "avx512f")]
33871	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33872	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33873	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33874	pub const fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33875	unsafe { simd_bitmask::<__m512i, _>(simd_ne(x:a.as_u64x8(), y:b.as_u64x8())) }
33876	}
33877
33878	/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33879	///
33880	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu64_mask&expand=1119)
33881	#[inline]
33882	#[target_feature(enable = "avx512f")]
33883	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33884	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33885	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33886	pub const fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33887	_mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
33888	}
33889
33890	/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
33891	///
33892	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu64_mask&expand=1116)
33893	#[inline]
33894	#[target_feature(enable = "avx512f,avx512vl")]
33895	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33896	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33897	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33898	pub const fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33899	unsafe { simd_bitmask::<__m256i, _>(simd_ne(x:a.as_u64x4(), y:b.as_u64x4())) }
33900	}
33901
33902	/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33903	///
33904	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu64_mask&expand=1117)
33905	#[inline]
33906	#[target_feature(enable = "avx512f,avx512vl")]
33907	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33908	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33909	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33910	pub const fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33911	_mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
33912	}
33913
33914	/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
33915	///
33916	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu64_mask&expand=1114)
33917	#[inline]
33918	#[target_feature(enable = "avx512f,avx512vl")]
33919	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33920	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33921	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33922	pub const fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33923	unsafe { simd_bitmask::<__m128i, _>(simd_ne(x:a.as_u64x2(), y:b.as_u64x2())) }
33924	}
33925
33926	/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33927	///
33928	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu64_mask&expand=1115)
33929	#[inline]
33930	#[target_feature(enable = "avx512f,avx512vl")]
33931	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33932	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33933	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33934	pub const fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33935	_mm_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
33936	}
33937
33938	/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33939	///
33940	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu64_mask&expand=727)
33941	#[inline]
33942	#[target_feature(enable = "avx512f")]
33943	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33944	#[rustc_legacy_const_generics(`2`)]
33945	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
33946	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33947	pub const fn _mm512_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
33948	a: __m512i,
33949	b: __m512i,
33950	) -> __mmask8 {
33951	unsafe {
33952	static_assert_uimm_bits!(IMM3, `3`);
33953	let a: Simd = a.as_u64x8();
33954	let b: Simd = b.as_u64x8();
33955	let r: Simd = match IMM3 {
33956	`0` => simd_eq(x:a, y:b),
33957	`1` => simd_lt(x:a, y:b),
33958	`2` => simd_le(x:a, y:b),
33959	`3` => i64x8::ZERO,
33960	`4` => simd_ne(x:a, y:b),
33961	`5` => simd_ge(x:a, y:b),
33962	`6` => simd_gt(x:a, y:b),
33963	_ => i64x8::splat(`-1`),
33964	};
33965	simd_bitmask(r)
33966	}
33967	}
33968
33969	/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33970	///
33971	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu64_mask&expand=728)
33972	#[inline]
33973	#[target_feature(enable = "avx512f")]
33974	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33975	#[rustc_legacy_const_generics(`3`)]
33976	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
33977	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33978	pub const fn _mm512_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
33979	k1: __mmask8,
33980	a: __m512i,
33981	b: __m512i,
33982	) -> __mmask8 {
33983	unsafe {
33984	static_assert_uimm_bits!(IMM3, `3`);
33985	let a: Simd = a.as_u64x8();
33986	let b: Simd = b.as_u64x8();
33987	let k1: Simd = simd_select_bitmask(m:k1, yes:i64x8::splat(`-1`), no:i64x8::ZERO);
33988	let r: Simd = match IMM3 {
33989	`0` => simd_and(x:k1, y:simd_eq(x:a, y:b)),
33990	`1` => simd_and(x:k1, y:simd_lt(x:a, y:b)),
33991	`2` => simd_and(x:k1, y:simd_le(x:a, y:b)),
33992	`3` => i64x8::ZERO,
33993	`4` => simd_and(x:k1, y:simd_ne(x:a, y:b)),
33994	`5` => simd_and(x:k1, y:simd_ge(x:a, y:b)),
33995	`6` => simd_and(x:k1, y:simd_gt(x:a, y:b)),
33996	_ => k1,
33997	};
33998	simd_bitmask(r)
33999	}
34000	}
34001
34002	/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34003	///
34004	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu64_mask&expand=725)
34005	#[inline]
34006	#[target_feature(enable = "avx512f,avx512vl")]
34007	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34008	#[rustc_legacy_const_generics(`2`)]
34009	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
34010	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34011	pub const fn _mm256_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
34012	a: __m256i,
34013	b: __m256i,
34014	) -> __mmask8 {
34015	unsafe {
34016	static_assert_uimm_bits!(IMM3, `3`);
34017	let a: Simd = a.as_u64x4();
34018	let b: Simd = b.as_u64x4();
34019	let r: Simd = match IMM3 {
34020	`0` => simd_eq(x:a, y:b),
34021	`1` => simd_lt(x:a, y:b),
34022	`2` => simd_le(x:a, y:b),
34023	`3` => i64x4::ZERO,
34024	`4` => simd_ne(x:a, y:b),
34025	`5` => simd_ge(x:a, y:b),
34026	`6` => simd_gt(x:a, y:b),
34027	_ => i64x4::splat(`-1`),
34028	};
34029	simd_bitmask(r)
34030	}
34031	}
34032
34033	/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34034	///
34035	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu64_mask&expand=726)
34036	#[inline]
34037	#[target_feature(enable = "avx512f,avx512vl")]
34038	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34039	#[rustc_legacy_const_generics(`3`)]
34040	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
34041	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34042	pub const fn _mm256_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
34043	k1: __mmask8,
34044	a: __m256i,
34045	b: __m256i,
34046	) -> __mmask8 {
34047	unsafe {
34048	static_assert_uimm_bits!(IMM3, `3`);
34049	let a: Simd = a.as_u64x4();
34050	let b: Simd = b.as_u64x4();
34051	let k1: Simd = simd_select_bitmask(m:k1, yes:i64x4::splat(`-1`), no:i64x4::ZERO);
34052	let r: Simd = match IMM3 {
34053	`0` => simd_and(x:k1, y:simd_eq(x:a, y:b)),
34054	`1` => simd_and(x:k1, y:simd_lt(x:a, y:b)),
34055	`2` => simd_and(x:k1, y:simd_le(x:a, y:b)),
34056	`3` => i64x4::ZERO,
34057	`4` => simd_and(x:k1, y:simd_ne(x:a, y:b)),
34058	`5` => simd_and(x:k1, y:simd_ge(x:a, y:b)),
34059	`6` => simd_and(x:k1, y:simd_gt(x:a, y:b)),
34060	_ => k1,
34061	};
34062	simd_bitmask(r)
34063	}
34064	}
34065
34066	/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34067	///
34068	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu64_mask&expand=723)
34069	#[inline]
34070	#[target_feature(enable = "avx512f,avx512vl")]
34071	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34072	#[rustc_legacy_const_generics(`2`)]
34073	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
34074	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34075	pub const fn _mm_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
34076	unsafe {
34077	static_assert_uimm_bits!(IMM3, `3`);
34078	let a: Simd = a.as_u64x2();
34079	let b: Simd = b.as_u64x2();
34080	let r: Simd = match IMM3 {
34081	`0` => simd_eq(x:a, y:b),
34082	`1` => simd_lt(x:a, y:b),
34083	`2` => simd_le(x:a, y:b),
34084	`3` => i64x2::ZERO,
34085	`4` => simd_ne(x:a, y:b),
34086	`5` => simd_ge(x:a, y:b),
34087	`6` => simd_gt(x:a, y:b),
34088	_ => i64x2::splat(`-1`),
34089	};
34090	simd_bitmask(r)
34091	}
34092	}
34093
34094	/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34095	///
34096	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu64_mask&expand=724)
34097	#[inline]
34098	#[target_feature(enable = "avx512f,avx512vl")]
34099	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34100	#[rustc_legacy_const_generics(`3`)]
34101	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
34102	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34103	pub const fn _mm_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
34104	k1: __mmask8,
34105	a: __m128i,
34106	b: __m128i,
34107	) -> __mmask8 {
34108	unsafe {
34109	static_assert_uimm_bits!(IMM3, `3`);
34110	let a: Simd = a.as_u64x2();
34111	let b: Simd = b.as_u64x2();
34112	let k1: Simd = simd_select_bitmask(m:k1, yes:i64x2::splat(`-1`), no:i64x2::ZERO);
34113	let r: Simd = match IMM3 {
34114	`0` => simd_and(x:k1, y:simd_eq(x:a, y:b)),
34115	`1` => simd_and(x:k1, y:simd_lt(x:a, y:b)),
34116	`2` => simd_and(x:k1, y:simd_le(x:a, y:b)),
34117	`3` => i64x2::ZERO,
34118	`4` => simd_and(x:k1, y:simd_ne(x:a, y:b)),
34119	`5` => simd_and(x:k1, y:simd_ge(x:a, y:b)),
34120	`6` => simd_and(x:k1, y:simd_gt(x:a, y:b)),
34121	_ => k1,
34122	};
34123	simd_bitmask(r)
34124	}
34125	}
34126
34127	/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
34128	///
34129	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi64_mask&expand=1037)
34130	#[inline]
34131	#[target_feature(enable = "avx512f")]
34132	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34133	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34134	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34135	pub const fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34136	unsafe { simd_bitmask::<__m512i, _>(simd_lt(x:a.as_i64x8(), y:b.as_i64x8())) }
34137	}
34138
34139	/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34140	///
34141	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi64_mask&expand=1038)
34142	#[inline]
34143	#[target_feature(enable = "avx512f")]
34144	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34145	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34146	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34147	pub const fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34148	_mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
34149	}
34150
34151	/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
34152	///
34153	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi64_mask&expand=1035)
34154	#[inline]
34155	#[target_feature(enable = "avx512f,avx512vl")]
34156	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34157	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34158	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34159	pub const fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34160	unsafe { simd_bitmask::<__m256i, _>(simd_lt(x:a.as_i64x4(), y:b.as_i64x4())) }
34161	}
34162
34163	/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34164	///
34165	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi64_mask&expand=1036)
34166	#[inline]
34167	#[target_feature(enable = "avx512f,avx512vl")]
34168	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34169	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34170	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34171	pub const fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34172	_mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
34173	}
34174
34175	/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
34176	///
34177	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi64_mask&expand=1033)
34178	#[inline]
34179	#[target_feature(enable = "avx512f,avx512vl")]
34180	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34181	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34182	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34183	pub const fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34184	unsafe { simd_bitmask::<__m128i, _>(simd_lt(x:a.as_i64x2(), y:b.as_i64x2())) }
34185	}
34186
34187	/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34188	///
34189	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi64_mask&expand=1034)
34190	#[inline]
34191	#[target_feature(enable = "avx512f,avx512vl")]
34192	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34193	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34194	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34195	pub const fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34196	_mm_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
34197	}
34198
34199	/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
34200	///
34201	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi64_mask&expand=913)
34202	#[inline]
34203	#[target_feature(enable = "avx512f")]
34204	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34205	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34206	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34207	pub const fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34208	unsafe { simd_bitmask::<__m512i, _>(simd_gt(x:a.as_i64x8(), y:b.as_i64x8())) }
34209	}
34210
34211	/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34212	///
34213	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi64_mask&expand=914)
34214	#[inline]
34215	#[target_feature(enable = "avx512f")]
34216	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34217	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34218	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34219	pub const fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34220	_mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
34221	}
34222
34223	/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
34224	///
34225	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64_mask&expand=911)
34226	#[inline]
34227	#[target_feature(enable = "avx512f,avx512vl")]
34228	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34229	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34230	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34231	pub const fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34232	unsafe { simd_bitmask::<__m256i, _>(simd_gt(x:a.as_i64x4(), y:b.as_i64x4())) }
34233	}
34234
34235	/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34236	///
34237	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi64_mask&expand=912)
34238	#[inline]
34239	#[target_feature(enable = "avx512f,avx512vl")]
34240	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34241	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34242	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34243	pub const fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34244	_mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
34245	}
34246
34247	/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
34248	///
34249	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi64_mask&expand=909)
34250	#[inline]
34251	#[target_feature(enable = "avx512f,avx512vl")]
34252	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34253	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34254	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34255	pub const fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34256	unsafe { simd_bitmask::<__m128i, _>(simd_gt(x:a.as_i64x2(), y:b.as_i64x2())) }
34257	}
34258
34259	/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34260	///
34261	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi64_mask&expand=910)
34262	#[inline]
34263	#[target_feature(enable = "avx512f,avx512vl")]
34264	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34265	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34266	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34267	pub const fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34268	_mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
34269	}
34270
34271	/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
34272	///
34273	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi64_mask&expand=977)
34274	#[inline]
34275	#[target_feature(enable = "avx512f")]
34276	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34277	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34278	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34279	pub const fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34280	unsafe { simd_bitmask::<__m512i, _>(simd_le(x:a.as_i64x8(), y:b.as_i64x8())) }
34281	}
34282
34283	/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34284	///
34285	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi64_mask&expand=978)
34286	#[inline]
34287	#[target_feature(enable = "avx512f")]
34288	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34289	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34290	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34291	pub const fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34292	_mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
34293	}
34294
34295	/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
34296	///
34297	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi64_mask&expand=975)
34298	#[inline]
34299	#[target_feature(enable = "avx512f,avx512vl")]
34300	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34301	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34302	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34303	pub const fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34304	unsafe { simd_bitmask::<__m256i, _>(simd_le(x:a.as_i64x4(), y:b.as_i64x4())) }
34305	}
34306
34307	/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34308	///
34309	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi64_mask&expand=976)
34310	#[inline]
34311	#[target_feature(enable = "avx512f,avx512vl")]
34312	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34313	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34314	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34315	pub const fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34316	_mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
34317	}
34318
34319	/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
34320	///
34321	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi64_mask&expand=973)
34322	#[inline]
34323	#[target_feature(enable = "avx512f,avx512vl")]
34324	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34325	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34326	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34327	pub const fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34328	unsafe { simd_bitmask::<__m128i, _>(simd_le(x:a.as_i64x2(), y:b.as_i64x2())) }
34329	}
34330
34331	/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34332	///
34333	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi64_mask&expand=974)
34334	#[inline]
34335	#[target_feature(enable = "avx512f,avx512vl")]
34336	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34337	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34338	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34339	pub const fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34340	_mm_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
34341	}
34342
34343	/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
34344	///
34345	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi64_mask&expand=855)
34346	#[inline]
34347	#[target_feature(enable = "avx512f")]
34348	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34349	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34350	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34351	pub const fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34352	unsafe { simd_bitmask::<__m512i, _>(simd_ge(x:a.as_i64x8(), y:b.as_i64x8())) }
34353	}
34354
34355	/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34356	///
34357	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi64_mask&expand=856)
34358	#[inline]
34359	#[target_feature(enable = "avx512f")]
34360	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34361	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34362	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34363	pub const fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34364	_mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
34365	}
34366
34367	/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
34368	///
34369	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi64_mask&expand=853)
34370	#[inline]
34371	#[target_feature(enable = "avx512f,avx512vl")]
34372	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34373	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34374	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34375	pub const fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34376	unsafe { simd_bitmask::<__m256i, _>(simd_ge(x:a.as_i64x4(), y:b.as_i64x4())) }
34377	}
34378
34379	/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34380	///
34381	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi64_mask&expand=854)
34382	#[inline]
34383	#[target_feature(enable = "avx512f,avx512vl")]
34384	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34385	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34386	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34387	pub const fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34388	_mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
34389	}
34390
34391	/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
34392	///
34393	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi64_mask&expand=851)
34394	#[inline]
34395	#[target_feature(enable = "avx512f,avx512vl")]
34396	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34397	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34398	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34399	pub const fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34400	unsafe { simd_bitmask::<__m128i, _>(simd_ge(x:a.as_i64x2(), y:b.as_i64x2())) }
34401	}
34402
34403	/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34404	///
34405	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi64_mask&expand=852)
34406	#[inline]
34407	#[target_feature(enable = "avx512f,avx512vl")]
34408	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34409	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34410	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34411	pub const fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34412	_mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
34413	}
34414
34415	/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
34416	///
34417	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi64_mask&expand=787)
34418	#[inline]
34419	#[target_feature(enable = "avx512f")]
34420	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34421	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34422	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34423	pub const fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34424	unsafe { simd_bitmask::<__m512i, _>(simd_eq(x:a.as_i64x8(), y:b.as_i64x8())) }
34425	}
34426
34427	/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34428	///
34429	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi64_mask&expand=788)
34430	#[inline]
34431	#[target_feature(enable = "avx512f")]
34432	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34433	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34434	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34435	pub const fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34436	_mm512_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
34437	}
34438
34439	/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
34440	///
34441	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64_mask&expand=785)
34442	#[inline]
34443	#[target_feature(enable = "avx512f,avx512vl")]
34444	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34445	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34446	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34447	pub const fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34448	unsafe { simd_bitmask::<__m256i, _>(simd_eq(x:a.as_i64x4(), y:b.as_i64x4())) }
34449	}
34450
34451	/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34452	///
34453	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi64_mask&expand=786)
34454	#[inline]
34455	#[target_feature(enable = "avx512f,avx512vl")]
34456	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34457	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34458	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34459	pub const fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34460	_mm256_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
34461	}
34462
34463	/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
34464	///
34465	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi64_mask&expand=783)
34466	#[inline]
34467	#[target_feature(enable = "avx512f,avx512vl")]
34468	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34469	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34470	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34471	pub const fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34472	unsafe { simd_bitmask::<__m128i, _>(simd_eq(x:a.as_i64x2(), y:b.as_i64x2())) }
34473	}
34474
34475	/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34476	///
34477	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi64_mask&expand=784)
34478	#[inline]
34479	#[target_feature(enable = "avx512f,avx512vl")]
34480	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34481	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34482	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34483	pub const fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34484	_mm_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
34485	}
34486
34487	/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
34488	///
34489	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi64_mask&expand=1094)
34490	#[inline]
34491	#[target_feature(enable = "avx512f")]
34492	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34493	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34494	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34495	pub const fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34496	unsafe { simd_bitmask::<__m512i, _>(simd_ne(x:a.as_i64x8(), y:b.as_i64x8())) }
34497	}
34498
34499	/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34500	///
34501	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi64_mask&expand=1095)
34502	#[inline]
34503	#[target_feature(enable = "avx512f")]
34504	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34505	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34506	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34507	pub const fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34508	_mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
34509	}
34510
34511	/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
34512	///
34513	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi64_mask&expand=1092)
34514	#[inline]
34515	#[target_feature(enable = "avx512f,avx512vl")]
34516	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34517	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34518	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34519	pub const fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34520	unsafe { simd_bitmask::<__m256i, _>(simd_ne(x:a.as_i64x4(), y:b.as_i64x4())) }
34521	}
34522
34523	/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34524	///
34525	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi64_mask&expand=1093)
34526	#[inline]
34527	#[target_feature(enable = "avx512f,avx512vl")]
34528	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34529	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34530	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34531	pub const fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34532	_mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
34533	}
34534
34535	/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
34536	///
34537	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi64_mask&expand=1090)
34538	#[inline]
34539	#[target_feature(enable = "avx512f,avx512vl")]
34540	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34541	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34542	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34543	pub const fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34544	unsafe { simd_bitmask::<__m128i, _>(simd_ne(x:a.as_i64x2(), y:b.as_i64x2())) }
34545	}
34546
34547	/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34548	///
34549	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi64_mask&expand=1091)
34550	#[inline]
34551	#[target_feature(enable = "avx512f,avx512vl")]
34552	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34553	#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34554	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34555	pub const fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34556	_mm_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
34557	}
34558
34559	/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34560	///
34561	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi64_mask&expand=703)
34562	#[inline]
34563	#[target_feature(enable = "avx512f")]
34564	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34565	#[rustc_legacy_const_generics(`2`)]
34566	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
34567	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34568	pub const fn _mm512_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34569	a: __m512i,
34570	b: __m512i,
34571	) -> __mmask8 {
34572	unsafe {
34573	static_assert_uimm_bits!(IMM3, `3`);
34574	let a: Simd = a.as_i64x8();
34575	let b: Simd = b.as_i64x8();
34576	let r: Simd = match IMM3 {
34577	`0` => simd_eq(x:a, y:b),
34578	`1` => simd_lt(x:a, y:b),
34579	`2` => simd_le(x:a, y:b),
34580	`3` => i64x8::ZERO,
34581	`4` => simd_ne(x:a, y:b),
34582	`5` => simd_ge(x:a, y:b),
34583	`6` => simd_gt(x:a, y:b),
34584	_ => i64x8::splat(`-1`),
34585	};
34586	simd_bitmask(r)
34587	}
34588	}
34589
34590	/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34591	///
34592	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi64_mask&expand=704)
34593	#[inline]
34594	#[target_feature(enable = "avx512f")]
34595	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34596	#[rustc_legacy_const_generics(`3`)]
34597	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
34598	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34599	pub const fn _mm512_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34600	k1: __mmask8,
34601	a: __m512i,
34602	b: __m512i,
34603	) -> __mmask8 {
34604	unsafe {
34605	static_assert_uimm_bits!(IMM3, `3`);
34606	let a: Simd = a.as_i64x8();
34607	let b: Simd = b.as_i64x8();
34608	let k1: Simd = simd_select_bitmask(m:k1, yes:i64x8::splat(`-1`), no:i64x8::ZERO);
34609	let r: Simd = match IMM3 {
34610	`0` => simd_and(x:k1, y:simd_eq(x:a, y:b)),
34611	`1` => simd_and(x:k1, y:simd_lt(x:a, y:b)),
34612	`2` => simd_and(x:k1, y:simd_le(x:a, y:b)),
34613	`3` => i64x8::ZERO,
34614	`4` => simd_and(x:k1, y:simd_ne(x:a, y:b)),
34615	`5` => simd_and(x:k1, y:simd_ge(x:a, y:b)),
34616	`6` => simd_and(x:k1, y:simd_gt(x:a, y:b)),
34617	_ => k1,
34618	};
34619	simd_bitmask(r)
34620	}
34621	}
34622
34623	/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34624	///
34625	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi64_mask&expand=701)
34626	#[inline]
34627	#[target_feature(enable = "avx512f,avx512vl")]
34628	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34629	#[rustc_legacy_const_generics(`2`)]
34630	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
34631	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34632	pub const fn _mm256_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34633	a: __m256i,
34634	b: __m256i,
34635	) -> __mmask8 {
34636	unsafe {
34637	static_assert_uimm_bits!(IMM3, `3`);
34638	let a: Simd = a.as_i64x4();
34639	let b: Simd = b.as_i64x4();
34640	let r: Simd = match IMM3 {
34641	`0` => simd_eq(x:a, y:b),
34642	`1` => simd_lt(x:a, y:b),
34643	`2` => simd_le(x:a, y:b),
34644	`3` => i64x4::ZERO,
34645	`4` => simd_ne(x:a, y:b),
34646	`5` => simd_ge(x:a, y:b),
34647	`6` => simd_gt(x:a, y:b),
34648	_ => i64x4::splat(`-1`),
34649	};
34650	simd_bitmask(r)
34651	}
34652	}
34653
34654	/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34655	///
34656	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi64_mask&expand=702)
34657	#[inline]
34658	#[target_feature(enable = "avx512f,avx512vl")]
34659	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34660	#[rustc_legacy_const_generics(`3`)]
34661	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
34662	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34663	pub const fn _mm256_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34664	k1: __mmask8,
34665	a: __m256i,
34666	b: __m256i,
34667	) -> __mmask8 {
34668	unsafe {
34669	static_assert_uimm_bits!(IMM3, `3`);
34670	let a: Simd = a.as_i64x4();
34671	let b: Simd = b.as_i64x4();
34672	let k1: Simd = simd_select_bitmask(m:k1, yes:i64x4::splat(`-1`), no:i64x4::ZERO);
34673	let r: Simd = match IMM3 {
34674	`0` => simd_and(x:k1, y:simd_eq(x:a, y:b)),
34675	`1` => simd_and(x:k1, y:simd_lt(x:a, y:b)),
34676	`2` => simd_and(x:k1, y:simd_le(x:a, y:b)),
34677	`3` => i64x4::ZERO,
34678	`4` => simd_and(x:k1, y:simd_ne(x:a, y:b)),
34679	`5` => simd_and(x:k1, y:simd_ge(x:a, y:b)),
34680	`6` => simd_and(x:k1, y:simd_gt(x:a, y:b)),
34681	_ => k1,
34682	};
34683	simd_bitmask(r)
34684	}
34685	}
34686
34687	/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34688	///
34689	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi64_mask&expand=699)
34690	#[inline]
34691	#[target_feature(enable = "avx512f,avx512vl")]
34692	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34693	#[rustc_legacy_const_generics(`2`)]
34694	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
34695	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34696	pub const fn _mm_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
34697	unsafe {
34698	static_assert_uimm_bits!(IMM3, `3`);
34699	let a: Simd = a.as_i64x2();
34700	let b: Simd = b.as_i64x2();
34701	let r: Simd = match IMM3 {
34702	`0` => simd_eq(x:a, y:b),
34703	`1` => simd_lt(x:a, y:b),
34704	`2` => simd_le(x:a, y:b),
34705	`3` => i64x2::ZERO,
34706	`4` => simd_ne(x:a, y:b),
34707	`5` => simd_ge(x:a, y:b),
34708	`6` => simd_gt(x:a, y:b),
34709	_ => i64x2::splat(`-1`),
34710	};
34711	simd_bitmask(r)
34712	}
34713	}
34714
34715	/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34716	///
34717	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi64_mask&expand=700)
34718	#[inline]
34719	#[target_feature(enable = "avx512f,avx512vl")]
34720	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34721	#[rustc_legacy_const_generics(`3`)]
34722	#[cfg_attr(test, assert_instr(vpcmp, IMM3 = `0`))]
34723	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34724	pub const fn _mm_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34725	k1: __mmask8,
34726	a: __m128i,
34727	b: __m128i,
34728	) -> __mmask8 {
34729	unsafe {
34730	static_assert_uimm_bits!(IMM3, `3`);
34731	let a: Simd = a.as_i64x2();
34732	let b: Simd = b.as_i64x2();
34733	let k1: Simd = simd_select_bitmask(m:k1, yes:i64x2::splat(`-1`), no:i64x2::ZERO);
34734	let r: Simd = match IMM3 {
34735	`0` => simd_and(x:k1, y:simd_eq(x:a, y:b)),
34736	`1` => simd_and(x:k1, y:simd_lt(x:a, y:b)),
34737	`2` => simd_and(x:k1, y:simd_le(x:a, y:b)),
34738	`3` => i64x2::ZERO,
34739	`4` => simd_and(x:k1, y:simd_ne(x:a, y:b)),
34740	`5` => simd_and(x:k1, y:simd_ge(x:a, y:b)),
34741	`6` => simd_and(x:k1, y:simd_gt(x:a, y:b)),
34742	_ => k1,
34743	};
34744	simd_bitmask(r)
34745	}
34746	}
34747
34748	/// Reduce the packed 32-bit integers in a by addition. Returns the sum of all elements in a.
34749	///
34750	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi32&expand=4556)
34751	#[inline]
34752	#[target_feature(enable = "avx512f")]
34753	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34754	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34755	pub const fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
34756	unsafe { simd_reduce_add_ordered(x:a.as_i32x16(), y:`0`) }
34757	}
34758
34759	/// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
34760	///
34761	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi32&expand=4555)
34762	#[inline]
34763	#[target_feature(enable = "avx512f")]
34764	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34765	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34766	pub const fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
34767	unsafe { simd_reduce_add_ordered(x:simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO), y:`0`) }
34768	}
34769
34770	/// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a.
34771	///
34772	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi64&expand=4558)
34773	#[inline]
34774	#[target_feature(enable = "avx512f")]
34775	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34776	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34777	pub const fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
34778	unsafe { simd_reduce_add_ordered(x:a.as_i64x8(), y:`0`) }
34779	}
34780
34781	/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
34782	///
34783	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi64&expand=4557)
34784	#[inline]
34785	#[target_feature(enable = "avx512f")]
34786	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34787	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34788	pub const fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
34789	unsafe { simd_reduce_add_ordered(x:simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO), y:`0`) }
34790	}
34791
34792	/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
34793	///
34794	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_ps&expand=4562)
34795	#[inline]
34796	#[target_feature(enable = "avx512f")]
34797	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34798	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34799	pub const fn _mm512_reduce_add_ps(a: __m512) -> f32 {
34800	unsafe {
34801	// we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
34802	let a: __m256 = _mm256_add_ps(
34803	a:simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]),
34804	b:simd_shuffle!(a, a, [`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]),
34805	);
34806	let a: __m128 = _mm_add_ps(a:_mm256_extractf128_ps::<`0`>(a), b:_mm256_extractf128_ps::<`1`>(a));
34807	let a: __m128 = _mm_add_ps(a, b:simd_shuffle!(a, a, [`2`, `3`, `0`, `1`]));
34808	simd_extract!(a, `0`, f32) + simd_extract!(a, `1`, f32)
34809	}
34810	}
34811
34812	/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
34813	///
34814	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_ps&expand=4561)
34815	#[inline]
34816	#[target_feature(enable = "avx512f")]
34817	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34818	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34819	pub const fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
34820	unsafe { _mm512_reduce_add_ps(simd_select_bitmask(m:k, yes:a, no:_mm512_setzero_ps())) }
34821	}
34822
34823	/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
34824	///
34825	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_pd&expand=4560)
34826	#[inline]
34827	#[target_feature(enable = "avx512f")]
34828	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34829	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34830	pub const fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
34831	unsafe {
34832	let a: __m256d = _mm256_add_pd(
34833	a:_mm512_extractf64x4_pd::<`0`>(a),
34834	b:_mm512_extractf64x4_pd::<`1`>(a),
34835	);
34836	let a: __m128d = _mm_add_pd(a:_mm256_extractf128_pd::<`0`>(a), b:_mm256_extractf128_pd::<`1`>(a));
34837	simd_extract!(a, `0`, f64) + simd_extract!(a, `1`, f64)
34838	}
34839	}
34840
34841	/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
34842	///
34843	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_pd&expand=4559)
34844	#[inline]
34845	#[target_feature(enable = "avx512f")]
34846	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34847	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34848	pub const fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
34849	unsafe { _mm512_reduce_add_pd(simd_select_bitmask(m:k, yes:a, no:_mm512_setzero_pd())) }
34850	}
34851
34852	/// Reduce the packed 32-bit integers in a by multiplication. Returns the product of all elements in a.
34853	///
34854	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi32&expand=4600)
34855	#[inline]
34856	#[target_feature(enable = "avx512f")]
34857	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34858	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34859	pub const fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
34860	unsafe { simd_reduce_mul_ordered(x:a.as_i32x16(), y:`1`) }
34861	}
34862
34863	/// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
34864	///
34865	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi32&expand=4599)
34866	#[inline]
34867	#[target_feature(enable = "avx512f")]
34868	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34869	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34870	pub const fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
34871	unsafe {
34872	simd_reduce_mul_ordered(
34873	x:simd_select_bitmask(k, a.as_i32x16(), _mm512_set1_epi32(`1`).as_i32x16()),
34874	y:`1`,
34875	)
34876	}
34877	}
34878
34879	/// Reduce the packed 64-bit integers in a by multiplication. Returns the product of all elements in a.
34880	///
34881	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi64&expand=4602)
34882	#[inline]
34883	#[target_feature(enable = "avx512f")]
34884	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34885	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34886	pub const fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
34887	unsafe { simd_reduce_mul_ordered(x:a.as_i64x8(), y:`1`) }
34888	}
34889
34890	/// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
34891	///
34892	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi64&expand=4601)
34893	#[inline]
34894	#[target_feature(enable = "avx512f")]
34895	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34896	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34897	pub const fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
34898	unsafe {
34899	simd_reduce_mul_ordered(
34900	x:simd_select_bitmask(k, a.as_i64x8(), _mm512_set1_epi64(`1`).as_i64x8()),
34901	y:`1`,
34902	)
34903	}
34904	}
34905
34906	/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
34907	///
34908	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_ps&expand=4606)
34909	#[inline]
34910	#[target_feature(enable = "avx512f")]
34911	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34912	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34913	pub const fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
34914	unsafe {
34915	// we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
34916	let a: __m256 = _mm256_mul_ps(
34917	a:simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]),
34918	b:simd_shuffle!(a, a, [`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]),
34919	);
34920	let a: __m128 = _mm_mul_ps(a:_mm256_extractf128_ps::<`0`>(a), b:_mm256_extractf128_ps::<`1`>(a));
34921	let a: __m128 = _mm_mul_ps(a, b:simd_shuffle!(a, a, [`2`, `3`, `0`, `1`]));
34922	simd_extract!(a, `0`, f32) * simd_extract!(a, `1`, f32)
34923	}
34924	}
34925
34926	/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
34927	///
34928	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_ps&expand=4605)
34929	#[inline]
34930	#[target_feature(enable = "avx512f")]
34931	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34932	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34933	pub const fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
34934	unsafe { _mm512_reduce_mul_ps(simd_select_bitmask(m:k, yes:a, no:_mm512_set1_ps(`1.`))) }
34935	}
34936
34937	/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
34938	///
34939	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_pd&expand=4604)
34940	#[inline]
34941	#[target_feature(enable = "avx512f")]
34942	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34943	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34944	pub const fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
34945	unsafe {
34946	let a: __m256d = _mm256_mul_pd(
34947	a:_mm512_extractf64x4_pd::<`0`>(a),
34948	b:_mm512_extractf64x4_pd::<`1`>(a),
34949	);
34950	let a: __m128d = _mm_mul_pd(a:_mm256_extractf128_pd::<`0`>(a), b:_mm256_extractf128_pd::<`1`>(a));
34951	simd_extract!(a, `0`, f64) * simd_extract!(a, `1`, f64)
34952	}
34953	}
34954
34955	/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
34956	///
34957	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_pd&expand=4603)
34958	#[inline]
34959	#[target_feature(enable = "avx512f")]
34960	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34961	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34962	pub const fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 {
34963	unsafe { _mm512_reduce_mul_pd(simd_select_bitmask(m:k, yes:a, no:_mm512_set1_pd(`1.`))) }
34964	}
34965
34966	/// Reduce the packed signed 32-bit integers in a by maximum. Returns the maximum of all elements in a.
34967	///
34968	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi32&expand=4576)
34969	#[inline]
34970	#[target_feature(enable = "avx512f")]
34971	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34972	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34973	pub const fn _mm512_reduce_max_epi32(a: __m512i) -> i32 {
34974	unsafe { simd_reduce_max(a.as_i32x16()) }
34975	}
34976
34977	/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
34978	///
34979	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi32&expand=4575)
34980	#[inline]
34981	#[target_feature(enable = "avx512f")]
34982	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34983	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34984	pub const fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
34985	unsafe {
34986	simd_reduce_max(simd_select_bitmask(
34987	m:k,
34988	yes:a.as_i32x16(),
34989	no:i32x16::splat(i32::MIN),
34990	))
34991	}
34992	}
34993
34994	/// Reduce the packed signed 64-bit integers in a by maximum. Returns the maximum of all elements in a.
34995	///
34996	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi64&expand=4578)
34997	#[inline]
34998	#[target_feature(enable = "avx512f")]
34999	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35000	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35001	pub const fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
35002	unsafe { simd_reduce_max(a.as_i64x8()) }
35003	}
35004
35005	/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
35006	///
35007	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi64&expand=4577)
35008	#[inline]
35009	#[target_feature(enable = "avx512f")]
35010	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35011	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35012	pub const fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
35013	unsafe { simd_reduce_max(simd_select_bitmask(m:k, yes:a.as_i64x8(), no:i64x8::splat(i64::MIN))) }
35014	}
35015
35016	/// Reduce the packed unsigned 32-bit integers in a by maximum. Returns the maximum of all elements in a.
35017	///
35018	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu32&expand=4580)
35019	#[inline]
35020	#[target_feature(enable = "avx512f")]
35021	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35022	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35023	pub const fn _mm512_reduce_max_epu32(a: __m512i) -> u32 {
35024	unsafe { simd_reduce_max(a.as_u32x16()) }
35025	}
35026
35027	/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
35028	///
35029	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu32&expand=4579)
35030	#[inline]
35031	#[target_feature(enable = "avx512f")]
35032	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35033	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35034	pub const fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 {
35035	unsafe { simd_reduce_max(simd_select_bitmask(m:k, yes:a.as_u32x16(), no:u32x16::ZERO)) }
35036	}
35037
35038	/// Reduce the packed unsigned 64-bit integers in a by maximum. Returns the maximum of all elements in a.
35039	///
35040	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu64&expand=4582)
35041	#[inline]
35042	#[target_feature(enable = "avx512f")]
35043	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35044	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35045	pub const fn _mm512_reduce_max_epu64(a: __m512i) -> u64 {
35046	unsafe { simd_reduce_max(a.as_u64x8()) }
35047	}
35048
35049	/// Reduce the packed unsigned 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
35050	///
35051	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu64&expand=4581)
35052	#[inline]
35053	#[target_feature(enable = "avx512f")]
35054	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35055	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35056	pub const fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 {
35057	unsafe { simd_reduce_max(simd_select_bitmask(m:k, yes:a.as_u64x8(), no:u64x8::ZERO)) }
35058	}
35059
35060	/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
35061	///
35062	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_ps&expand=4586)
35063	#[inline]
35064	#[target_feature(enable = "avx512f")]
35065	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35066	pub fn _mm512_reduce_max_ps(a: __m512) -> f32 {
35067	unsafe {
35068	let a: __m256 = _mm256_max_ps(
35069	a:simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]),
35070	b:simd_shuffle!(a, a, [`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]),
35071	);
35072	let a: __m128 = _mm_max_ps(a:_mm256_extractf128_ps::<`0`>(a), b:_mm256_extractf128_ps::<`1`>(a));
35073	let a: __m128 = _mm_max_ps(a, b:simd_shuffle!(a, a, [`2`, `3`, `0`, `1`]));
35074	_mm_cvtss_f32(_mm_max_ss(a, b:_mm_movehdup_ps(a)))
35075	}
35076	}
35077
35078	/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
35079	///
35080	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_ps&expand=4585)
35081	#[inline]
35082	#[target_feature(enable = "avx512f")]
35083	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35084	pub fn _mm512_mask_reduce_max_ps(k: __mmask16, a: __m512) -> f32 {
35085	_mm512_reduce_max_ps(_mm512_mask_mov_ps(src:_mm512_set1_ps(f32::MIN), k, a))
35086	}
35087
35088	/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
35089	///
35090	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_pd&expand=4584)
35091	#[inline]
35092	#[target_feature(enable = "avx512f")]
35093	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35094	pub fn _mm512_reduce_max_pd(a: __m512d) -> f64 {
35095	unsafe {
35096	let a: __m256d = _mm256_max_pd(
35097	a:_mm512_extractf64x4_pd::<`0`>(a),
35098	b:_mm512_extractf64x4_pd::<`1`>(a),
35099	);
35100	let a: __m128d = _mm_max_pd(a:_mm256_extractf128_pd::<`0`>(a), b:_mm256_extractf128_pd::<`1`>(a));
35101	_mm_cvtsd_f64(_mm_max_sd(a, b:simd_shuffle!(a, a, [`1`, `0`])))
35102	}
35103	}
35104
35105	/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
35106	///
35107	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_pd&expand=4583)
35108	#[inline]
35109	#[target_feature(enable = "avx512f")]
35110	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35111	pub fn _mm512_mask_reduce_max_pd(k: __mmask8, a: __m512d) -> f64 {
35112	_mm512_reduce_max_pd(_mm512_mask_mov_pd(src:_mm512_set1_pd(f64::MIN), k, a))
35113	}
35114
35115	/// Reduce the packed signed 32-bit integers in a by minimum. Returns the minimum of all elements in a.
35116	///
35117	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi32&expand=4588)
35118	#[inline]
35119	#[target_feature(enable = "avx512f")]
35120	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35121	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35122	pub const fn _mm512_reduce_min_epi32(a: __m512i) -> i32 {
35123	unsafe { simd_reduce_min(a.as_i32x16()) }
35124	}
35125
35126	/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
35127	///
35128	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi32&expand=4587)
35129	#[inline]
35130	#[target_feature(enable = "avx512f")]
35131	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35132	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35133	pub const fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 {
35134	unsafe {
35135	simd_reduce_min(simd_select_bitmask(
35136	m:k,
35137	yes:a.as_i32x16(),
35138	no:i32x16::splat(i32::MAX),
35139	))
35140	}
35141	}
35142
35143	/// Reduce the packed signed 64-bit integers in a by minimum. Returns the minimum of all elements in a.
35144	///
35145	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi64&expand=4590)
35146	#[inline]
35147	#[target_feature(enable = "avx512f")]
35148	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35149	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35150	pub const fn _mm512_reduce_min_epi64(a: __m512i) -> i64 {
35151	unsafe { simd_reduce_min(a.as_i64x8()) }
35152	}
35153
35154	/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
35155	///
35156	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi64&expand=4589)
35157	#[inline]
35158	#[target_feature(enable = "avx512f")]
35159	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35160	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35161	pub const fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 {
35162	unsafe { simd_reduce_min(simd_select_bitmask(m:k, yes:a.as_i64x8(), no:i64x8::splat(i64::MAX))) }
35163	}
35164
35165	/// Reduce the packed unsigned 32-bit integers in a by minimum. Returns the minimum of all elements in a.
35166	///
35167	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu32&expand=4592)
35168	#[inline]
35169	#[target_feature(enable = "avx512f")]
35170	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35171	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35172	pub const fn _mm512_reduce_min_epu32(a: __m512i) -> u32 {
35173	unsafe { simd_reduce_min(a.as_u32x16()) }
35174	}
35175
35176	/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
35177	///
35178	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu32&expand=4591)
35179	#[inline]
35180	#[target_feature(enable = "avx512f")]
35181	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35182	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35183	pub const fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 {
35184	unsafe {
35185	simd_reduce_min(simd_select_bitmask(
35186	m:k,
35187	yes:a.as_u32x16(),
35188	no:u32x16::splat(u32::MAX),
35189	))
35190	}
35191	}
35192
35193	/// Reduce the packed unsigned 64-bit integers in a by minimum. Returns the minimum of all elements in a.
35194	///
35195	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu64&expand=4594)
35196	#[inline]
35197	#[target_feature(enable = "avx512f")]
35198	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35199	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35200	pub const fn _mm512_reduce_min_epu64(a: __m512i) -> u64 {
35201	unsafe { simd_reduce_min(a.as_u64x8()) }
35202	}
35203
35204	/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
35205	///
35206	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu64&expand=4589)
35207	#[inline]
35208	#[target_feature(enable = "avx512f")]
35209	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35210	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35211	pub const fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 {
35212	unsafe { simd_reduce_min(simd_select_bitmask(m:k, yes:a.as_u64x8(), no:u64x8::splat(u64::MAX))) }
35213	}
35214
35215	/// Reduce the packed single-precision (32-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
35216	///
35217	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_ps&expand=4598)
35218	#[inline]
35219	#[target_feature(enable = "avx512f")]
35220	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35221	pub fn _mm512_reduce_min_ps(a: __m512) -> f32 {
35222	unsafe {
35223	let a: __m256 = _mm256_min_ps(
35224	a:simd_shuffle!(a, a, [`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`]),
35225	b:simd_shuffle!(a, a, [`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`]),
35226	);
35227	let a: __m128 = _mm_min_ps(a:_mm256_extractf128_ps::<`0`>(a), b:_mm256_extractf128_ps::<`1`>(a));
35228	let a: __m128 = _mm_min_ps(a, b:simd_shuffle!(a, a, [`2`, `3`, `0`, `1`]));
35229	_mm_cvtss_f32(_mm_min_ss(a, b:_mm_movehdup_ps(a)))
35230	}
35231	}
35232
35233	/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
35234	///
35235	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_ps&expand=4597)
35236	#[inline]
35237	#[target_feature(enable = "avx512f")]
35238	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35239	pub fn _mm512_mask_reduce_min_ps(k: __mmask16, a: __m512) -> f32 {
35240	_mm512_reduce_min_ps(_mm512_mask_mov_ps(src:_mm512_set1_ps(f32::MAX), k, a))
35241	}
35242
35243	/// Reduce the packed double-precision (64-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
35244	///
35245	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_pd&expand=4596)
35246	#[inline]
35247	#[target_feature(enable = "avx512f")]
35248	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35249	pub fn _mm512_reduce_min_pd(a: __m512d) -> f64 {
35250	unsafe {
35251	let a: __m256d = _mm256_min_pd(
35252	a:_mm512_extractf64x4_pd::<`0`>(a),
35253	b:_mm512_extractf64x4_pd::<`1`>(a),
35254	);
35255	let a: __m128d = _mm_min_pd(a:_mm256_extractf128_pd::<`0`>(a), b:_mm256_extractf128_pd::<`1`>(a));
35256	_mm_cvtsd_f64(_mm_min_sd(a, b:simd_shuffle!(a, a, [`1`, `0`])))
35257	}
35258	}
35259
35260	/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
35261	///
35262	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_pd&expand=4595)
35263	#[inline]
35264	#[target_feature(enable = "avx512f")]
35265	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35266	pub fn _mm512_mask_reduce_min_pd(k: __mmask8, a: __m512d) -> f64 {
35267	_mm512_reduce_min_pd(_mm512_mask_mov_pd(src:_mm512_set1_pd(f64::MAX), k, a))
35268	}
35269
35270	/// Reduce the packed 32-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
35271	///
35272	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi32&expand=4564)
35273	#[inline]
35274	#[target_feature(enable = "avx512f")]
35275	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35276	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35277	pub const fn _mm512_reduce_and_epi32(a: __m512i) -> i32 {
35278	unsafe { simd_reduce_and(a.as_i32x16()) }
35279	}
35280
35281	/// Reduce the packed 32-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
35282	///
35283	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi32&expand=4563)
35284	#[inline]
35285	#[target_feature(enable = "avx512f")]
35286	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35287	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35288	pub const fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 {
35289	unsafe { simd_reduce_and(simd_select_bitmask(m:k, yes:a.as_i32x16(), no:i32x16::splat(`-1`))) }
35290	}
35291
35292	/// Reduce the packed 64-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
35293	///
35294	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi64&expand=4566)
35295	#[inline]
35296	#[target_feature(enable = "avx512f")]
35297	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35298	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35299	pub const fn _mm512_reduce_and_epi64(a: __m512i) -> i64 {
35300	unsafe { simd_reduce_and(a.as_i64x8()) }
35301	}
35302
35303	/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
35304	///
35305	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi64&expand=4557)
35306	#[inline]
35307	#[target_feature(enable = "avx512f")]
35308	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35309	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35310	pub const fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 {
35311	unsafe { simd_reduce_and(simd_select_bitmask(m:k, yes:a.as_i64x8(), no:i64x8::splat(`-1`))) }
35312	}
35313
35314	/// Reduce the packed 32-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
35315	///
35316	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi32&expand=4608)
35317	#[inline]
35318	#[target_feature(enable = "avx512f")]
35319	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35320	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35321	pub const fn _mm512_reduce_or_epi32(a: __m512i) -> i32 {
35322	unsafe { simd_reduce_or(a.as_i32x16()) }
35323	}
35324
35325	/// Reduce the packed 32-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
35326	///
35327	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi32&expand=4607)
35328	#[inline]
35329	#[target_feature(enable = "avx512f")]
35330	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35331	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35332	pub const fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 {
35333	unsafe { simd_reduce_or(simd_select_bitmask(m:k, yes:a.as_i32x16(), no:i32x16::ZERO)) }
35334	}
35335
35336	/// Reduce the packed 64-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
35337	///
35338	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi64&expand=4610)
35339	#[inline]
35340	#[target_feature(enable = "avx512f")]
35341	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35342	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35343	pub const fn _mm512_reduce_or_epi64(a: __m512i) -> i64 {
35344	unsafe { simd_reduce_or(a.as_i64x8()) }
35345	}
35346
35347	/// Reduce the packed 64-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
35348	///
35349	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi64&expand=4609)
35350	#[inline]
35351	#[target_feature(enable = "avx512f")]
35352	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35353	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35354	pub const fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 {
35355	unsafe { simd_reduce_or(simd_select_bitmask(m:k, yes:a.as_i64x8(), no:i64x8::ZERO)) }
35356	}
35357
35358	/// Returns vector of type `__m512d` with indeterminate elements.
35359	/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
35360	/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
35361	/// In practice, this is typically equivalent to [`mem::zeroed`].
35362	///
35363	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_pd)
35364	#[inline]
35365	#[target_feature(enable = "avx512f")]
35366	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35367	// This intrinsic has no corresponding instruction.
35368	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35369	pub const fn _mm512_undefined_pd() -> __m512d {
35370	unsafe { const { mem::zeroed() } }
35371	}
35372
35373	/// Returns vector of type `__m512` with indeterminate elements.
35374	/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
35375	/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
35376	/// In practice, this is typically equivalent to [`mem::zeroed`].
35377	///
35378	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_ps)
35379	#[inline]
35380	#[target_feature(enable = "avx512f")]
35381	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35382	// This intrinsic has no corresponding instruction.
35383	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35384	pub const fn _mm512_undefined_ps() -> __m512 {
35385	unsafe { const { mem::zeroed() } }
35386	}
35387
35388	/// Return vector of type __m512i with indeterminate elements.
35389	/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
35390	/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
35391	/// In practice, this is typically equivalent to [`mem::zeroed`].
35392	///
35393	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_epi32&expand=5995)
35394	#[inline]
35395	#[target_feature(enable = "avx512f")]
35396	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35397	// This intrinsic has no corresponding instruction.
35398	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35399	pub const fn _mm512_undefined_epi32() -> __m512i {
35400	unsafe { const { mem::zeroed() } }
35401	}
35402
35403	/// Return vector of type __m512 with indeterminate elements.
35404	/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
35405	/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
35406	/// In practice, this is typically equivalent to [`mem::zeroed`].
35407	///
35408	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined&expand=5994)
35409	#[inline]
35410	#[target_feature(enable = "avx512f")]
35411	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35412	// This intrinsic has no corresponding instruction.
35413	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35414	pub const fn _mm512_undefined() -> __m512 {
35415	unsafe { const { mem::zeroed() } }
35416	}
35417
35418	/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
35419	///
35420	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi32&expand=3377)
35421	#[inline]
35422	#[target_feature(enable = "avx512f")]
35423	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35424	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
35425	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35426	pub const unsafe fn _mm512_loadu_epi32(mem_addr: *const i32) -> __m512i {
35427	ptr::read_unaligned(src:mem_addr as *const __m512i)
35428	}
35429
35430	/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
35431	///
35432	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi32&expand=3374)
35433	#[inline]
35434	#[target_feature(enable = "avx512f,avx512vl")]
35435	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35436	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
35437	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35438	pub const unsafe fn _mm256_loadu_epi32(mem_addr: *const i32) -> __m256i {
35439	ptr::read_unaligned(src:mem_addr as *const __m256i)
35440	}
35441
35442	/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
35443	///
35444	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi32&expand=3371)
35445	#[inline]
35446	#[target_feature(enable = "avx512f,avx512vl")]
35447	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35448	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
35449	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35450	pub const unsafe fn _mm_loadu_epi32(mem_addr: *const i32) -> __m128i {
35451	ptr::read_unaligned(src:mem_addr as *const __m128i)
35452	}
35453
35454	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35455	///
35456	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi16&expand=1460)
35457	#[inline]
35458	#[target_feature(enable = "avx512f")]
35459	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35460	#[cfg_attr(test, assert_instr(vpmovdw))]
35461	pub unsafe fn _mm512_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
35462	vpmovdwmem(mem_addr.cast(), a.as_i32x16(), mask:k);
35463	}
35464
35465	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35466	///
35467	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi16&expand=1462)
35468	#[inline]
35469	#[target_feature(enable = "avx512f,avx512vl")]
35470	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35471	#[cfg_attr(test, assert_instr(vpmovdw))]
35472	pub unsafe fn _mm256_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
35473	vpmovdwmem256(mem_addr.cast(), a.as_i32x8(), mask:k);
35474	}
35475
35476	/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35477	///
35478	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi16&expand=1461)
35479	#[inline]
35480	#[target_feature(enable = "avx512f,avx512vl")]
35481	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35482	#[cfg_attr(test, assert_instr(vpmovdw))]
35483	pub unsafe fn _mm_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
35484	vpmovdwmem128(mem_addr.cast(), a.as_i32x4(), mask:k);
35485	}
35486
35487	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35488	///
35489	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi16&expand=1833)
35490	#[inline]
35491	#[target_feature(enable = "avx512f")]
35492	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35493	#[cfg_attr(test, assert_instr(vpmovsdw))]
35494	pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
35495	vpmovsdwmem(mem_addr.cast(), a.as_i32x16(), mask:k);
35496	}
35497
35498	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35499	///
35500	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi16&expand=1832)
35501	#[inline]
35502	#[target_feature(enable = "avx512f,avx512vl")]
35503	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35504	#[cfg_attr(test, assert_instr(vpmovsdw))]
35505	pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
35506	vpmovsdwmem256(mem_addr.cast(), a.as_i32x8(), mask:k);
35507	}
35508
35509	/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35510	///
35511	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi16&expand=1831)
35512	#[inline]
35513	#[target_feature(enable = "avx512f,avx512vl")]
35514	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35515	#[cfg_attr(test, assert_instr(vpmovsdw))]
35516	pub unsafe fn _mm_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
35517	vpmovsdwmem128(mem_addr.cast(), a.as_i32x4(), mask:k);
35518	}
35519
35520	/// Convert packed unsigned 32-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35521	///
35522	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi16&expand=2068)
35523	#[inline]
35524	#[target_feature(enable = "avx512f")]
35525	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35526	#[cfg_attr(test, assert_instr(vpmovusdw))]
35527	pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
35528	vpmovusdwmem(mem_addr.cast(), a.as_i32x16(), mask:k);
35529	}
35530
35531	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35532	///
35533	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi16&expand=2067)
35534	#[inline]
35535	#[target_feature(enable = "avx512f,avx512vl")]
35536	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35537	#[cfg_attr(test, assert_instr(vpmovusdw))]
35538	pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
35539	vpmovusdwmem256(mem_addr.cast(), a.as_i32x8(), mask:k);
35540	}
35541
35542	/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35543	///
35544	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi16&expand=2066)
35545	#[inline]
35546	#[target_feature(enable = "avx512f,avx512vl")]
35547	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35548	#[cfg_attr(test, assert_instr(vpmovusdw))]
35549	pub unsafe fn _mm_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
35550	vpmovusdwmem128(mem_addr.cast(), a.as_i32x4(), mask:k);
35551	}
35552
35553	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35554	///
35555	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi8&expand=1463)
35556	#[inline]
35557	#[target_feature(enable = "avx512f")]
35558	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35559	#[cfg_attr(test, assert_instr(vpmovdb))]
35560	pub unsafe fn _mm512_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
35561	vpmovdbmem(mem_addr, a.as_i32x16(), mask:k);
35562	}
35563
35564	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35565	///
35566	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi8&expand=1462)
35567	#[inline]
35568	#[target_feature(enable = "avx512f,avx512vl")]
35569	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35570	#[cfg_attr(test, assert_instr(vpmovdb))]
35571	pub unsafe fn _mm256_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
35572	vpmovdbmem256(mem_addr, a.as_i32x8(), mask:k);
35573	}
35574
35575	/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35576	///
35577	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi8&expand=1461)
35578	#[inline]
35579	#[target_feature(enable = "avx512f,avx512vl")]
35580	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35581	#[cfg_attr(test, assert_instr(vpmovdb))]
35582	pub unsafe fn _mm_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
35583	vpmovdbmem128(mem_addr, a.as_i32x4(), mask:k);
35584	}
35585
35586	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35587	///
35588	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi8&expand=1836)
35589	#[inline]
35590	#[target_feature(enable = "avx512f")]
35591	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35592	#[cfg_attr(test, assert_instr(vpmovsdb))]
35593	pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
35594	vpmovsdbmem(mem_addr, a.as_i32x16(), mask:k);
35595	}
35596
35597	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35598	///
35599	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi8&expand=1835)
35600	#[inline]
35601	#[target_feature(enable = "avx512f,avx512vl")]
35602	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35603	#[cfg_attr(test, assert_instr(vpmovsdb))]
35604	pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
35605	vpmovsdbmem256(mem_addr, a.as_i32x8(), mask:k);
35606	}
35607
35608	/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35609	///
35610	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi8&expand=1834)
35611	#[inline]
35612	#[target_feature(enable = "avx512f,avx512vl")]
35613	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35614	#[cfg_attr(test, assert_instr(vpmovsdb))]
35615	pub unsafe fn _mm_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
35616	vpmovsdbmem128(mem_addr, a.as_i32x4(), mask:k);
35617	}
35618
35619	/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35620	///
35621	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi8&expand=2071)
35622	#[inline]
35623	#[target_feature(enable = "avx512f")]
35624	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35625	#[cfg_attr(test, assert_instr(vpmovusdb))]
35626	pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
35627	vpmovusdbmem(mem_addr, a.as_i32x16(), mask:k);
35628	}
35629
35630	/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35631	///
35632	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi8&expand=2070)
35633	#[inline]
35634	#[target_feature(enable = "avx512f,avx512vl")]
35635	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35636	#[cfg_attr(test, assert_instr(vpmovusdb))]
35637	pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
35638	vpmovusdbmem256(mem_addr, a.as_i32x8(), mask:k);
35639	}
35640
35641	/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35642	///
35643	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi8&expand=2069)
35644	#[inline]
35645	#[target_feature(enable = "avx512f,avx512vl")]
35646	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35647	#[cfg_attr(test, assert_instr(vpmovusdb))]
35648	pub unsafe fn _mm_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
35649	vpmovusdbmem128(mem_addr, a.as_i32x4(), mask:k);
35650	}
35651
35652	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35653	///
35654	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi16&expand=1513)
35655	#[inline]
35656	#[target_feature(enable = "avx512f")]
35657	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35658	#[cfg_attr(test, assert_instr(vpmovqw))]
35659	pub unsafe fn _mm512_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
35660	vpmovqwmem(mem_addr.cast(), a.as_i64x8(), mask:k);
35661	}
35662
35663	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35664	///
35665	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi16&expand=1512)
35666	#[inline]
35667	#[target_feature(enable = "avx512f,avx512vl")]
35668	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35669	#[cfg_attr(test, assert_instr(vpmovqw))]
35670	pub unsafe fn _mm256_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
35671	vpmovqwmem256(mem_addr.cast(), a.as_i64x4(), mask:k);
35672	}
35673
35674	/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35675	///
35676	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi16&expand=1511)
35677	#[inline]
35678	#[target_feature(enable = "avx512f,avx512vl")]
35679	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35680	#[cfg_attr(test, assert_instr(vpmovqw))]
35681	pub unsafe fn _mm_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
35682	vpmovqwmem128(mem_addr.cast(), a.as_i64x2(), mask:k);
35683	}
35684
35685	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35686	///
35687	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi16&expand=1866)
35688	#[inline]
35689	#[target_feature(enable = "avx512f")]
35690	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35691	#[cfg_attr(test, assert_instr(vpmovsqw))]
35692	pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
35693	vpmovsqwmem(mem_addr.cast(), a.as_i64x8(), mask:k);
35694	}
35695
35696	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35697	///
35698	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi16&expand=1865)
35699	#[inline]
35700	#[target_feature(enable = "avx512f,avx512vl")]
35701	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35702	#[cfg_attr(test, assert_instr(vpmovsqw))]
35703	pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
35704	vpmovsqwmem256(mem_addr.cast(), a.as_i64x4(), mask:k);
35705	}
35706
35707	/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35708	///
35709	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi16&expand=1864)
35710	#[inline]
35711	#[target_feature(enable = "avx512f,avx512vl")]
35712	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35713	#[cfg_attr(test, assert_instr(vpmovsqw))]
35714	pub unsafe fn _mm_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
35715	vpmovsqwmem128(mem_addr.cast(), a.as_i64x2(), mask:k);
35716	}
35717
35718	/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35719	///
35720	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi16&expand=2101)
35721	#[inline]
35722	#[target_feature(enable = "avx512f")]
35723	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35724	#[cfg_attr(test, assert_instr(vpmovusqw))]
35725	pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
35726	vpmovusqwmem(mem_addr.cast(), a.as_i64x8(), mask:k);
35727	}
35728
35729	/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35730	///
35731	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi16&expand=2100)
35732	#[inline]
35733	#[target_feature(enable = "avx512f,avx512vl")]
35734	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35735	#[cfg_attr(test, assert_instr(vpmovusqw))]
35736	pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
35737	vpmovusqwmem256(mem_addr.cast(), a.as_i64x4(), mask:k);
35738	}
35739
35740	/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35741	///
35742	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi16&expand=2099)
35743	#[inline]
35744	#[target_feature(enable = "avx512f,avx512vl")]
35745	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35746	#[cfg_attr(test, assert_instr(vpmovusqw))]
35747	pub unsafe fn _mm_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
35748	vpmovusqwmem128(mem_addr.cast(), a.as_i64x2(), mask:k);
35749	}
35750
35751	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35752	///
35753	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi8&expand=1519)
35754	#[inline]
35755	#[target_feature(enable = "avx512f")]
35756	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35757	#[cfg_attr(test, assert_instr(vpmovqb))]
35758	pub unsafe fn _mm512_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
35759	vpmovqbmem(mem_addr, a.as_i64x8(), mask:k);
35760	}
35761
35762	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35763	///
35764	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi8&expand=1518)
35765	#[inline]
35766	#[target_feature(enable = "avx512f,avx512vl")]
35767	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35768	#[cfg_attr(test, assert_instr(vpmovqb))]
35769	pub unsafe fn _mm256_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
35770	vpmovqbmem256(mem_addr, a.as_i64x4(), mask:k);
35771	}
35772
35773	/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35774	///
35775	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi8&expand=1517)
35776	#[inline]
35777	#[target_feature(enable = "avx512f,avx512vl")]
35778	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35779	#[cfg_attr(test, assert_instr(vpmovqb))]
35780	pub unsafe fn _mm_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
35781	vpmovqbmem128(mem_addr, a.as_i64x2(), mask:k);
35782	}
35783
35784	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35785	///
35786	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi8&expand=1872)
35787	#[inline]
35788	#[target_feature(enable = "avx512f")]
35789	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35790	#[cfg_attr(test, assert_instr(vpmovsqb))]
35791	pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
35792	vpmovsqbmem(mem_addr, a.as_i64x8(), mask:k);
35793	}
35794
35795	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35796	///
35797	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi8&expand=1871)
35798	#[inline]
35799	#[target_feature(enable = "avx512f,avx512vl")]
35800	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35801	#[cfg_attr(test, assert_instr(vpmovsqb))]
35802	pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
35803	vpmovsqbmem256(mem_addr, a.as_i64x4(), mask:k);
35804	}
35805
35806	/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35807	///
35808	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi8&expand=1870)
35809	#[inline]
35810	#[target_feature(enable = "avx512f,avx512vl")]
35811	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35812	#[cfg_attr(test, assert_instr(vpmovsqb))]
35813	pub unsafe fn _mm_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
35814	vpmovsqbmem128(mem_addr, a.as_i64x2(), mask:k);
35815	}
35816
35817	/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35818	///
35819	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi8&expand=2107)
35820	#[inline]
35821	#[target_feature(enable = "avx512f")]
35822	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35823	#[cfg_attr(test, assert_instr(vpmovusqb))]
35824	pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
35825	vpmovusqbmem(mem_addr, a.as_i64x8(), mask:k);
35826	}
35827
35828	/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35829	///
35830	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi8&expand=2106)
35831	#[inline]
35832	#[target_feature(enable = "avx512f,avx512vl")]
35833	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35834	#[cfg_attr(test, assert_instr(vpmovusqb))]
35835	pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
35836	vpmovusqbmem256(mem_addr, a.as_i64x4(), mask:k);
35837	}
35838
35839	/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35840	///
35841	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi8&expand=2105)
35842	#[inline]
35843	#[target_feature(enable = "avx512f,avx512vl")]
35844	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35845	#[cfg_attr(test, assert_instr(vpmovusqb))]
35846	pub unsafe fn _mm_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
35847	vpmovusqbmem128(mem_addr, a.as_i64x2(), mask:k);
35848	}
35849
35850	///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35851	///
35852	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi32&expand=1516)
35853	#[inline]
35854	#[target_feature(enable = "avx512f")]
35855	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35856	#[cfg_attr(test, assert_instr(vpmovqd))]
35857	pub unsafe fn _mm512_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
35858	vpmovqdmem(mem_addr.cast(), a.as_i64x8(), mask:k);
35859	}
35860
35861	///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35862	///
35863	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi32&expand=1515)
35864	#[inline]
35865	#[target_feature(enable = "avx512f,avx512vl")]
35866	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35867	#[cfg_attr(test, assert_instr(vpmovqd))]
35868	pub unsafe fn _mm256_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
35869	vpmovqdmem256(mem_addr.cast(), a.as_i64x4(), mask:k);
35870	}
35871
35872	///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35873	///
35874	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi32&expand=1514)
35875	#[inline]
35876	#[target_feature(enable = "avx512f,avx512vl")]
35877	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35878	#[cfg_attr(test, assert_instr(vpmovqd))]
35879	pub unsafe fn _mm_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
35880	vpmovqdmem128(mem_addr.cast(), a.as_i64x2(), mask:k);
35881	}
35882
35883	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35884	///
35885	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi32&expand=1869)
35886	#[inline]
35887	#[target_feature(enable = "avx512f")]
35888	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35889	#[cfg_attr(test, assert_instr(vpmovsqd))]
35890	pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
35891	vpmovsqdmem(mem_addr.cast(), a.as_i64x8(), mask:k);
35892	}
35893
35894	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35895	///
35896	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi32&expand=1868)
35897	#[inline]
35898	#[target_feature(enable = "avx512f,avx512vl")]
35899	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35900	#[cfg_attr(test, assert_instr(vpmovsqd))]
35901	pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
35902	vpmovsqdmem256(mem_addr.cast(), a.as_i64x4(), mask:k);
35903	}
35904
35905	/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35906	///
35907	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi32&expand=1867)
35908	#[inline]
35909	#[target_feature(enable = "avx512f,avx512vl")]
35910	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35911	#[cfg_attr(test, assert_instr(vpmovsqd))]
35912	pub unsafe fn _mm_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
35913	vpmovsqdmem128(mem_addr.cast(), a.as_i64x2(), mask:k);
35914	}
35915
35916	/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35917	///
35918	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi32&expand=2104)
35919	#[inline]
35920	#[target_feature(enable = "avx512f")]
35921	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35922	#[cfg_attr(test, assert_instr(vpmovusqd))]
35923	pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
35924	vpmovusqdmem(mem_addr.cast(), a.as_i64x8(), mask:k);
35925	}
35926
35927	/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35928	///
35929	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi32&expand=2103)
35930	#[inline]
35931	#[target_feature(enable = "avx512f,avx512vl")]
35932	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35933	#[cfg_attr(test, assert_instr(vpmovusqd))]
35934	pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
35935	vpmovusqdmem256(mem_addr.cast(), a.as_i64x4(), mask:k);
35936	}
35937
35938	/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35939	///
35940	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi32&expand=2102)
35941	#[inline]
35942	#[target_feature(enable = "avx512f,avx512vl")]
35943	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35944	#[cfg_attr(test, assert_instr(vpmovusqd))]
35945	pub unsafe fn _mm_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
35946	vpmovusqdmem128(mem_addr.cast(), a.as_i64x2(), mask:k);
35947	}
35948
35949	/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
35950	///
35951	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi32&expand=5628)
35952	#[inline]
35953	#[target_feature(enable = "avx512f")]
35954	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35955	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
35956	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35957	pub const unsafe fn _mm512_storeu_epi32(mem_addr: *mut i32, a: __m512i) {
35958	ptr::write_unaligned(dst:mem_addr as *mut __m512i, src:a);
35959	}
35960
35961	/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
35962	///
35963	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi32&expand=5626)
35964	#[inline]
35965	#[target_feature(enable = "avx512f,avx512vl")]
35966	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35967	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
35968	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35969	pub const unsafe fn _mm256_storeu_epi32(mem_addr: *mut i32, a: __m256i) {
35970	ptr::write_unaligned(dst:mem_addr as *mut __m256i, src:a);
35971	}
35972
35973	/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
35974	///
35975	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi32&expand=5624)
35976	#[inline]
35977	#[target_feature(enable = "avx512f,avx512vl")]
35978	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35979	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
35980	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35981	pub const unsafe fn _mm_storeu_epi32(mem_addr: *mut i32, a: __m128i) {
35982	ptr::write_unaligned(dst:mem_addr as *mut __m128i, src:a);
35983	}
35984
35985	/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
35986	///
35987	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi64&expand=3386)
35988	#[inline]
35989	#[target_feature(enable = "avx512f")]
35990	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35991	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
35992	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35993	pub const unsafe fn _mm512_loadu_epi64(mem_addr: *const i64) -> __m512i {
35994	ptr::read_unaligned(src:mem_addr as *const __m512i)
35995	}
35996
35997	/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
35998	///
35999	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi64&expand=3383)
36000	#[inline]
36001	#[target_feature(enable = "avx512f,avx512vl")]
36002	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36003	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
36004	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36005	pub const unsafe fn _mm256_loadu_epi64(mem_addr: *const i64) -> __m256i {
36006	ptr::read_unaligned(src:mem_addr as *const __m256i)
36007	}
36008
36009	/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
36010	///
36011	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi64&expand=3380)
36012	#[inline]
36013	#[target_feature(enable = "avx512f,avx512vl")]
36014	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36015	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
36016	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36017	pub const unsafe fn _mm_loadu_epi64(mem_addr: *const i64) -> __m128i {
36018	ptr::read_unaligned(src:mem_addr as *const __m128i)
36019	}
36020
36021	/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
36022	///
36023	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi64&expand=5634)
36024	#[inline]
36025	#[target_feature(enable = "avx512f")]
36026	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36027	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
36028	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36029	pub const unsafe fn _mm512_storeu_epi64(mem_addr: *mut i64, a: __m512i) {
36030	ptr::write_unaligned(dst:mem_addr as *mut __m512i, src:a);
36031	}
36032
36033	/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
36034	///
36035	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi64&expand=5632)
36036	#[inline]
36037	#[target_feature(enable = "avx512f,avx512vl")]
36038	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36039	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
36040	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36041	pub const unsafe fn _mm256_storeu_epi64(mem_addr: *mut i64, a: __m256i) {
36042	ptr::write_unaligned(dst:mem_addr as *mut __m256i, src:a);
36043	}
36044
36045	/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
36046	///
36047	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi64&expand=5630)
36048	#[inline]
36049	#[target_feature(enable = "avx512f,avx512vl")]
36050	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36051	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
36052	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36053	pub const unsafe fn _mm_storeu_epi64(mem_addr: *mut i64, a: __m128i) {
36054	ptr::write_unaligned(dst:mem_addr as *mut __m128i, src:a);
36055	}
36056
36057	/// Load 512-bits of integer data from memory into dst. mem_addr does not need to be aligned on any particular boundary.
36058	///
36059	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_si512&expand=3420)
36060	#[inline]
36061	#[target_feature(enable = "avx512f")]
36062	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36063	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
36064	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36065	pub const unsafe fn _mm512_loadu_si512(mem_addr: *const __m512i) -> __m512i {
36066	ptr::read_unaligned(src:mem_addr)
36067	}
36068
36069	/// Store 512-bits of integer data from a into memory. mem_addr does not need to be aligned on any particular boundary.
36070	///
36071	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_si512&expand=5657)
36072	#[inline]
36073	#[target_feature(enable = "avx512f")]
36074	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36075	#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
36076	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36077	pub const unsafe fn _mm512_storeu_si512(mem_addr: *mut __m512i, a: __m512i) {
36078	ptr::write_unaligned(dst:mem_addr, src:a);
36079	}
36080
36081	/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
36082	/// floating-point elements) from memory into result.
36083	/// `mem_addr` does not need to be aligned on any particular boundary.
36084	///
36085	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_pd)
36086	#[inline]
36087	#[target_feature(enable = "avx512f")]
36088	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36089	#[cfg_attr(test, assert_instr(vmovups))]
36090	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36091	pub const unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
36092	ptr::read_unaligned(src:mem_addr as *const __m512d)
36093	}
36094
36095	/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
36096	/// floating-point elements) from `a` into memory.
36097	/// `mem_addr` does not need to be aligned on any particular boundary.
36098	///
36099	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_pd)
36100	#[inline]
36101	#[target_feature(enable = "avx512f")]
36102	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36103	#[cfg_attr(test, assert_instr(vmovups))]
36104	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36105	pub const unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
36106	ptr::write_unaligned(dst:mem_addr as *mut __m512d, src:a);
36107	}
36108
36109	/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
36110	/// floating-point elements) from memory into result.
36111	/// `mem_addr` does not need to be aligned on any particular boundary.
36112	///
36113	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_ps)
36114	#[inline]
36115	#[target_feature(enable = "avx512f")]
36116	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36117	#[cfg_attr(test, assert_instr(vmovups))]
36118	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36119	pub const unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
36120	ptr::read_unaligned(src:mem_addr as *const __m512)
36121	}
36122
36123	/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
36124	/// floating-point elements) from `a` into memory.
36125	/// `mem_addr` does not need to be aligned on any particular boundary.
36126	///
36127	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_ps)
36128	#[inline]
36129	#[target_feature(enable = "avx512f")]
36130	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36131	#[cfg_attr(test, assert_instr(vmovups))]
36132	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36133	pub const unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
36134	ptr::write_unaligned(dst:mem_addr as *mut __m512, src:a);
36135	}
36136
36137	/// Load 512-bits of integer data from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36138	///
36139	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_si512&expand=3345)
36140	#[inline]
36141	#[target_feature(enable = "avx512f")]
36142	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36143	#[cfg_attr(
36144	all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36145	assert_instr(vmovaps)
36146	)] //should be vmovdqa32
36147	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36148	pub const unsafe fn _mm512_load_si512(mem_addr: *const __m512i) -> __m512i {
36149	ptr::read(src:mem_addr)
36150	}
36151
36152	/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36153	///
36154	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_si512&expand=5598)
36155	#[inline]
36156	#[target_feature(enable = "avx512f")]
36157	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36158	#[cfg_attr(
36159	all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36160	assert_instr(vmovaps)
36161	)] //should be vmovdqa32
36162	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36163	pub const unsafe fn _mm512_store_si512(mem_addr: *mut __m512i, a: __m512i) {
36164	ptr::write(dst:mem_addr, src:a);
36165	}
36166
36167	/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36168	///
36169	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi32&expand=3304)
36170	#[inline]
36171	#[target_feature(enable = "avx512f")]
36172	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36173	#[cfg_attr(
36174	all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36175	assert_instr(vmovaps)
36176	)] //should be vmovdqa32
36177	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36178	pub const unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i {
36179	ptr::read(src:mem_addr as *const __m512i)
36180	}
36181
36182	/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36183	///
36184	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi32&expand=3301)
36185	#[inline]
36186	#[target_feature(enable = "avx512f,avx512vl")]
36187	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36188	#[cfg_attr(
36189	all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36190	assert_instr(vmovaps)
36191	)] //should be vmovdqa32
36192	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36193	pub const unsafe fn _mm256_load_epi32(mem_addr: *const i32) -> __m256i {
36194	ptr::read(src:mem_addr as *const __m256i)
36195	}
36196
36197	/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
36198	///
36199	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi32&expand=3298)
36200	#[inline]
36201	#[target_feature(enable = "avx512f,avx512vl")]
36202	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36203	#[cfg_attr(
36204	all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36205	assert_instr(vmovaps)
36206	)] //should be vmovdqa32
36207	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36208	pub const unsafe fn _mm_load_epi32(mem_addr: *const i32) -> __m128i {
36209	ptr::read(src:mem_addr as *const __m128i)
36210	}
36211
36212	/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36213	///
36214	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi32&expand=5569)
36215	#[inline]
36216	#[target_feature(enable = "avx512f")]
36217	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36218	#[cfg_attr(
36219	all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36220	assert_instr(vmovaps)
36221	)] //should be vmovdqa32
36222	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36223	pub const unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) {
36224	ptr::write(dst:mem_addr as *mut __m512i, src:a);
36225	}
36226
36227	/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36228	///
36229	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi32&expand=5567)
36230	#[inline]
36231	#[target_feature(enable = "avx512f,avx512vl")]
36232	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36233	#[cfg_attr(
36234	all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36235	assert_instr(vmovaps)
36236	)] //should be vmovdqa32
36237	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36238	pub const unsafe fn _mm256_store_epi32(mem_addr: *mut i32, a: __m256i) {
36239	ptr::write(dst:mem_addr as *mut __m256i, src:a);
36240	}
36241
36242	/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
36243	///
36244	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi32&expand=5565)
36245	#[inline]
36246	#[target_feature(enable = "avx512f,avx512vl")]
36247	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36248	#[cfg_attr(
36249	all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36250	assert_instr(vmovaps)
36251	)] //should be vmovdqa32
36252	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36253	pub const unsafe fn _mm_store_epi32(mem_addr: *mut i32, a: __m128i) {
36254	ptr::write(dst:mem_addr as *mut __m128i, src:a);
36255	}
36256
36257	/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36258	///
36259	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi64&expand=3313)
36260	#[inline]
36261	#[target_feature(enable = "avx512f")]
36262	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36263	#[cfg_attr(
36264	all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36265	assert_instr(vmovaps)
36266	)] //should be vmovdqa64
36267	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36268	pub const unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i {
36269	ptr::read(src:mem_addr as *const __m512i)
36270	}
36271
36272	/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36273	///
36274	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi64&expand=3310)
36275	#[inline]
36276	#[target_feature(enable = "avx512f,avx512vl")]
36277	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36278	#[cfg_attr(
36279	all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36280	assert_instr(vmovaps)
36281	)] //should be vmovdqa64
36282	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36283	pub const unsafe fn _mm256_load_epi64(mem_addr: *const i64) -> __m256i {
36284	ptr::read(src:mem_addr as *const __m256i)
36285	}
36286
36287	/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
36288	///
36289	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi64&expand=3307)
36290	#[inline]
36291	#[target_feature(enable = "avx512f,avx512vl")]
36292	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36293	#[cfg_attr(
36294	all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36295	assert_instr(vmovaps)
36296	)] //should be vmovdqa64
36297	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36298	pub const unsafe fn _mm_load_epi64(mem_addr: *const i64) -> __m128i {
36299	ptr::read(src:mem_addr as *const __m128i)
36300	}
36301
36302	/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36303	///
36304	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi64&expand=5575)
36305	#[inline]
36306	#[target_feature(enable = "avx512f")]
36307	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36308	#[cfg_attr(
36309	all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36310	assert_instr(vmovaps)
36311	)] //should be vmovdqa64
36312	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36313	pub const unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) {
36314	ptr::write(dst:mem_addr as *mut __m512i, src:a);
36315	}
36316
36317	/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36318	///
36319	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi64&expand=5573)
36320	#[inline]
36321	#[target_feature(enable = "avx512f,avx512vl")]
36322	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36323	#[cfg_attr(
36324	all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36325	assert_instr(vmovaps)
36326	)] //should be vmovdqa64
36327	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36328	pub const unsafe fn _mm256_store_epi64(mem_addr: *mut i64, a: __m256i) {
36329	ptr::write(dst:mem_addr as *mut __m256i, src:a);
36330	}
36331
36332	/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
36333	///
36334	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi64&expand=5571)
36335	#[inline]
36336	#[target_feature(enable = "avx512f,avx512vl")]
36337	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36338	#[cfg_attr(
36339	all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36340	assert_instr(vmovaps)
36341	)] //should be vmovdqa64
36342	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36343	pub const unsafe fn _mm_store_epi64(mem_addr: *mut i64, a: __m128i) {
36344	ptr::write(dst:mem_addr as *mut __m128i, src:a);
36345	}
36346
36347	/// Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36348	///
36349	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_ps&expand=3336)
36350	#[inline]
36351	#[target_feature(enable = "avx512f")]
36352	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36353	#[cfg_attr(
36354	all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36355	assert_instr(vmovaps)
36356	)]
36357	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36358	pub const unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 {
36359	ptr::read(src:mem_addr as *const __m512)
36360	}
36361
36362	/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36363	///
36364	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_ps&expand=5592)
36365	#[inline]
36366	#[target_feature(enable = "avx512f")]
36367	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36368	#[cfg_attr(
36369	all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36370	assert_instr(vmovaps)
36371	)]
36372	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36373	pub const unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) {
36374	ptr::write(dst:mem_addr as *mut __m512, src:a);
36375	}
36376
36377	/// Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36378	///
36379	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_pd&expand=3326)
36380	#[inline]
36381	#[target_feature(enable = "avx512f")]
36382	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36383	#[cfg_attr(
36384	all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36385	assert_instr(vmovaps)
36386	)] //should be vmovapd
36387	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36388	pub const unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d {
36389	ptr::read(src:mem_addr as *const __m512d)
36390	}
36391
36392	/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36393	///
36394	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_pd&expand=5585)
36395	#[inline]
36396	#[target_feature(enable = "avx512f")]
36397	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36398	#[cfg_attr(
36399	all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36400	assert_instr(vmovaps)
36401	)] //should be vmovapd
36402	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36403	pub const unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
36404	ptr::write(dst:mem_addr as *mut __m512d, src:a);
36405	}
36406
36407	/// Load packed 32-bit integers from memory into dst using writemask k
36408	/// (elements are copied from src when the corresponding mask bit is not set).
36409	/// mem_addr does not need to be aligned on any particular boundary.
36410	///
36411	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi32)
36412	#[inline]
36413	#[target_feature(enable = "avx512f")]
36414	#[cfg_attr(test, assert_instr(vmovdqu32))]
36415	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36416	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36417	pub const unsafe fn _mm512_mask_loadu_epi32(
36418	src: __m512i,
36419	k: __mmask16,
36420	mem_addr: *const i32,
36421	) -> __m512i {
36422	let mask: Simd = simd_select_bitmask(m:k, yes:i32x16::splat(!`0`), no:i32x16::ZERO);
36423	simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i32x16()).as_m512i()
36424	}
36425
36426	/// Load packed 32-bit integers from memory into dst using zeromask k
36427	/// (elements are zeroed out when the corresponding mask bit is not set).
36428	/// mem_addr does not need to be aligned on any particular boundary.
36429	///
36430	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi32)
36431	#[inline]
36432	#[target_feature(enable = "avx512f")]
36433	#[cfg_attr(test, assert_instr(vmovdqu32))]
36434	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36435	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36436	pub const unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
36437	_mm512_mask_loadu_epi32(src:_mm512_setzero_si512(), k, mem_addr)
36438	}
36439
36440	/// Load packed 64-bit integers from memory into dst using writemask k
36441	/// (elements are copied from src when the corresponding mask bit is not set).
36442	/// mem_addr does not need to be aligned on any particular boundary.
36443	///
36444	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi64)
36445	#[inline]
36446	#[target_feature(enable = "avx512f")]
36447	#[cfg_attr(test, assert_instr(vmovdqu64))]
36448	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36449	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36450	pub const unsafe fn _mm512_mask_loadu_epi64(
36451	src: __m512i,
36452	k: __mmask8,
36453	mem_addr: *const i64,
36454	) -> __m512i {
36455	let mask: Simd = simd_select_bitmask(m:k, yes:i64x8::splat(!`0`), no:i64x8::ZERO);
36456	simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i64x8()).as_m512i()
36457	}
36458
36459	/// Load packed 64-bit integers from memory into dst using zeromask k
36460	/// (elements are zeroed out when the corresponding mask bit is not set).
36461	/// mem_addr does not need to be aligned on any particular boundary.
36462	///
36463	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi64)
36464	#[inline]
36465	#[target_feature(enable = "avx512f")]
36466	#[cfg_attr(test, assert_instr(vmovdqu64))]
36467	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36468	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36469	pub const unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
36470	_mm512_mask_loadu_epi64(src:_mm512_setzero_si512(), k, mem_addr)
36471	}
36472
36473	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
36474	/// (elements are copied from src when the corresponding mask bit is not set).
36475	/// mem_addr does not need to be aligned on any particular boundary.
36476	///
36477	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_ps)
36478	#[inline]
36479	#[target_feature(enable = "avx512f")]
36480	#[cfg_attr(test, assert_instr(vmovups))]
36481	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36482	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36483	pub const unsafe fn _mm512_mask_loadu_ps(
36484	src: __m512,
36485	k: __mmask16,
36486	mem_addr: *const f32,
36487	) -> __m512 {
36488	let mask: Simd = simd_select_bitmask(m:k, yes:i32x16::splat(!`0`), no:i32x16::ZERO);
36489	simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f32x16()).as_m512()
36490	}
36491
36492	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
36493	/// (elements are zeroed out when the corresponding mask bit is not set).
36494	/// mem_addr does not need to be aligned on any particular boundary.
36495	///
36496	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_ps)
36497	#[inline]
36498	#[target_feature(enable = "avx512f")]
36499	#[cfg_attr(test, assert_instr(vmovups))]
36500	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36501	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36502	pub const unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
36503	_mm512_mask_loadu_ps(src:_mm512_setzero_ps(), k, mem_addr)
36504	}
36505
36506	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
36507	/// (elements are copied from src when the corresponding mask bit is not set).
36508	/// mem_addr does not need to be aligned on any particular boundary.
36509	///
36510	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_pd)
36511	#[inline]
36512	#[target_feature(enable = "avx512f")]
36513	#[cfg_attr(test, assert_instr(vmovupd))]
36514	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36515	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36516	pub const unsafe fn _mm512_mask_loadu_pd(
36517	src: __m512d,
36518	k: __mmask8,
36519	mem_addr: *const f64,
36520	) -> __m512d {
36521	let mask: Simd = simd_select_bitmask(m:k, yes:i64x8::splat(!`0`), no:i64x8::ZERO);
36522	simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f64x8()).as_m512d()
36523	}
36524
36525	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
36526	/// (elements are zeroed out when the corresponding mask bit is not set).
36527	/// mem_addr does not need to be aligned on any particular boundary.
36528	///
36529	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_pd)
36530	#[inline]
36531	#[target_feature(enable = "avx512f")]
36532	#[cfg_attr(test, assert_instr(vmovupd))]
36533	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36534	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36535	pub const unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
36536	_mm512_mask_loadu_pd(src:_mm512_setzero_pd(), k, mem_addr)
36537	}
36538
36539	/// Load packed 32-bit integers from memory into dst using writemask k
36540	/// (elements are copied from src when the corresponding mask bit is not set).
36541	/// mem_addr does not need to be aligned on any particular boundary.
36542	///
36543	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi32)
36544	#[inline]
36545	#[target_feature(enable = "avx512f,avx512vl")]
36546	#[cfg_attr(test, assert_instr(vmovdqu32))]
36547	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36548	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36549	pub const unsafe fn _mm256_mask_loadu_epi32(
36550	src: __m256i,
36551	k: __mmask8,
36552	mem_addr: *const i32,
36553	) -> __m256i {
36554	let mask: Simd = simd_select_bitmask(m:k, yes:i32x8::splat(!`0`), no:i32x8::ZERO);
36555	simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i32x8()).as_m256i()
36556	}
36557
36558	/// Load packed 32-bit integers from memory into dst using zeromask k
36559	/// (elements are zeroed out when the corresponding mask bit is not set).
36560	/// mem_addr does not need to be aligned on any particular boundary.
36561	///
36562	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi32)
36563	#[inline]
36564	#[target_feature(enable = "avx512f,avx512vl")]
36565	#[cfg_attr(test, assert_instr(vmovdqu32))]
36566	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36567	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36568	pub const unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
36569	_mm256_mask_loadu_epi32(src:_mm256_setzero_si256(), k, mem_addr)
36570	}
36571
36572	/// Load packed 64-bit integers from memory into dst using writemask k
36573	/// (elements are copied from src when the corresponding mask bit is not set).
36574	/// mem_addr does not need to be aligned on any particular boundary.
36575	///
36576	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi64)
36577	#[inline]
36578	#[target_feature(enable = "avx512f,avx512vl")]
36579	#[cfg_attr(test, assert_instr(vmovdqu64))]
36580	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36581	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36582	pub const unsafe fn _mm256_mask_loadu_epi64(
36583	src: __m256i,
36584	k: __mmask8,
36585	mem_addr: *const i64,
36586	) -> __m256i {
36587	let mask: Simd = simd_select_bitmask(m:k, yes:i64x4::splat(!`0`), no:i64x4::ZERO);
36588	simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i64x4()).as_m256i()
36589	}
36590
36591	/// Load packed 64-bit integers from memory into dst using zeromask k
36592	/// (elements are zeroed out when the corresponding mask bit is not set).
36593	/// mem_addr does not need to be aligned on any particular boundary.
36594	///
36595	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi64)
36596	#[inline]
36597	#[target_feature(enable = "avx512f,avx512vl")]
36598	#[cfg_attr(test, assert_instr(vmovdqu64))]
36599	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36600	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36601	pub const unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
36602	_mm256_mask_loadu_epi64(src:_mm256_setzero_si256(), k, mem_addr)
36603	}
36604
36605	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
36606	/// (elements are copied from src when the corresponding mask bit is not set).
36607	/// mem_addr does not need to be aligned on any particular boundary.
36608	///
36609	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_ps)
36610	#[inline]
36611	#[target_feature(enable = "avx512f,avx512vl")]
36612	#[cfg_attr(test, assert_instr(vmovups))]
36613	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36614	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36615	pub const unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
36616	let mask: Simd = simd_select_bitmask(m:k, yes:i32x8::splat(!`0`), no:i32x8::ZERO);
36617	simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f32x8()).as_m256()
36618	}
36619
36620	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
36621	/// (elements are zeroed out when the corresponding mask bit is not set).
36622	/// mem_addr does not need to be aligned on any particular boundary.
36623	///
36624	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_ps)
36625	#[inline]
36626	#[target_feature(enable = "avx512f,avx512vl")]
36627	#[cfg_attr(test, assert_instr(vmovups))]
36628	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36629	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36630	pub const unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
36631	_mm256_mask_loadu_ps(src:_mm256_setzero_ps(), k, mem_addr)
36632	}
36633
36634	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
36635	/// (elements are copied from src when the corresponding mask bit is not set).
36636	/// mem_addr does not need to be aligned on any particular boundary.
36637	///
36638	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_pd)
36639	#[inline]
36640	#[target_feature(enable = "avx512f,avx512vl")]
36641	#[cfg_attr(test, assert_instr(vmovupd))]
36642	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36643	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36644	pub const unsafe fn _mm256_mask_loadu_pd(
36645	src: __m256d,
36646	k: __mmask8,
36647	mem_addr: *const f64,
36648	) -> __m256d {
36649	let mask: Simd = simd_select_bitmask(m:k, yes:i64x4::splat(!`0`), no:i64x4::ZERO);
36650	simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f64x4()).as_m256d()
36651	}
36652
36653	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
36654	/// (elements are zeroed out when the corresponding mask bit is not set).
36655	/// mem_addr does not need to be aligned on any particular boundary.
36656	///
36657	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_pd)
36658	#[inline]
36659	#[target_feature(enable = "avx512f,avx512vl")]
36660	#[cfg_attr(test, assert_instr(vmovupd))]
36661	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36662	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36663	pub const unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
36664	_mm256_mask_loadu_pd(src:_mm256_setzero_pd(), k, mem_addr)
36665	}
36666
36667	/// Load packed 32-bit integers from memory into dst using writemask k
36668	/// (elements are copied from src when the corresponding mask bit is not set).
36669	/// mem_addr does not need to be aligned on any particular boundary.
36670	///
36671	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi32)
36672	#[inline]
36673	#[target_feature(enable = "avx512f,avx512vl")]
36674	#[cfg_attr(test, assert_instr(vmovdqu32))]
36675	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36676	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36677	pub const unsafe fn _mm_mask_loadu_epi32(
36678	src: __m128i,
36679	k: __mmask8,
36680	mem_addr: *const i32,
36681	) -> __m128i {
36682	let mask: Simd = simd_select_bitmask(m:k, yes:i32x4::splat(!`0`), no:i32x4::ZERO);
36683	simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i32x4()).as_m128i()
36684	}
36685
36686	/// Load packed 32-bit integers from memory into dst using zeromask k
36687	/// (elements are zeroed out when the corresponding mask bit is not set).
36688	/// mem_addr does not need to be aligned on any particular boundary.
36689	///
36690	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi32)
36691	#[inline]
36692	#[target_feature(enable = "avx512f,avx512vl")]
36693	#[cfg_attr(test, assert_instr(vmovdqu32))]
36694	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36695	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36696	pub const unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
36697	_mm_mask_loadu_epi32(src:_mm_setzero_si128(), k, mem_addr)
36698	}
36699
36700	/// Load packed 64-bit integers from memory into dst using writemask k
36701	/// (elements are copied from src when the corresponding mask bit is not set).
36702	/// mem_addr does not need to be aligned on any particular boundary.
36703	///
36704	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi64)
36705	#[inline]
36706	#[target_feature(enable = "avx512f,avx512vl")]
36707	#[cfg_attr(test, assert_instr(vmovdqu64))]
36708	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36709	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36710	pub const unsafe fn _mm_mask_loadu_epi64(
36711	src: __m128i,
36712	k: __mmask8,
36713	mem_addr: *const i64,
36714	) -> __m128i {
36715	let mask: Simd = simd_select_bitmask(m:k, yes:i64x2::splat(!`0`), no:i64x2::ZERO);
36716	simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i64x2()).as_m128i()
36717	}
36718
36719	/// Load packed 64-bit integers from memory into dst using zeromask k
36720	/// (elements are zeroed out when the corresponding mask bit is not set).
36721	/// mem_addr does not need to be aligned on any particular boundary.
36722	///
36723	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi64)
36724	#[inline]
36725	#[target_feature(enable = "avx512f,avx512vl")]
36726	#[cfg_attr(test, assert_instr(vmovdqu64))]
36727	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36728	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36729	pub const unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
36730	_mm_mask_loadu_epi64(src:_mm_setzero_si128(), k, mem_addr)
36731	}
36732
36733	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
36734	/// (elements are copied from src when the corresponding mask bit is not set).
36735	/// mem_addr does not need to be aligned on any particular boundary.
36736	///
36737	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_ps)
36738	#[inline]
36739	#[target_feature(enable = "avx512f,avx512vl")]
36740	#[cfg_attr(test, assert_instr(vmovups))]
36741	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36742	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36743	pub const unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
36744	let mask: Simd = simd_select_bitmask(m:k, yes:i32x4::splat(!`0`), no:i32x4::ZERO);
36745	simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f32x4()).as_m128()
36746	}
36747
36748	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
36749	/// (elements are zeroed out when the corresponding mask bit is not set).
36750	/// mem_addr does not need to be aligned on any particular boundary.
36751	///
36752	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_ps)
36753	#[inline]
36754	#[target_feature(enable = "avx512f,avx512vl")]
36755	#[cfg_attr(test, assert_instr(vmovups))]
36756	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36757	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36758	pub const unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
36759	_mm_mask_loadu_ps(src:_mm_setzero_ps(), k, mem_addr)
36760	}
36761
36762	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
36763	/// (elements are copied from src when the corresponding mask bit is not set).
36764	/// mem_addr does not need to be aligned on any particular boundary.
36765	///
36766	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_pd)
36767	#[inline]
36768	#[target_feature(enable = "avx512f,avx512vl")]
36769	#[cfg_attr(test, assert_instr(vmovupd))]
36770	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36771	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36772	pub const unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
36773	let mask: Simd = simd_select_bitmask(m:k, yes:i64x2::splat(!`0`), no:i64x2::ZERO);
36774	simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f64x2()).as_m128d()
36775	}
36776
36777	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
36778	/// (elements are zeroed out when the corresponding mask bit is not set).
36779	/// mem_addr does not need to be aligned on any particular boundary.
36780	///
36781	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_pd)
36782	#[inline]
36783	#[target_feature(enable = "avx512f,avx512vl")]
36784	#[cfg_attr(test, assert_instr(vmovupd))]
36785	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36786	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36787	pub const unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
36788	_mm_mask_loadu_pd(src:_mm_setzero_pd(), k, mem_addr)
36789	}
36790
36791	/// Load packed 32-bit integers from memory into dst using writemask k
36792	/// (elements are copied from src when the corresponding mask bit is not set).
36793	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36794	///
36795	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi32)
36796	#[inline]
36797	#[target_feature(enable = "avx512f")]
36798	#[cfg_attr(test, assert_instr(vmovdqa32))]
36799	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36800	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36801	pub const unsafe fn _mm512_mask_load_epi32(
36802	src: __m512i,
36803	k: __mmask16,
36804	mem_addr: *const i32,
36805	) -> __m512i {
36806	let mask: Simd = simd_select_bitmask(m:k, yes:i32x16::splat(!`0`), no:i32x16::ZERO);
36807	simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i32x16()).as_m512i()
36808	}
36809
36810	/// Load packed 32-bit integers from memory into dst using zeromask k
36811	/// (elements are zeroed out when the corresponding mask bit is not set).
36812	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36813	///
36814	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi32)
36815	#[inline]
36816	#[target_feature(enable = "avx512f")]
36817	#[cfg_attr(test, assert_instr(vmovdqa32))]
36818	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36819	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36820	pub const unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
36821	_mm512_mask_load_epi32(src:_mm512_setzero_si512(), k, mem_addr)
36822	}
36823
36824	/// Load packed 64-bit integers from memory into dst using writemask k
36825	/// (elements are copied from src when the corresponding mask bit is not set).
36826	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36827	///
36828	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi64)
36829	#[inline]
36830	#[target_feature(enable = "avx512f")]
36831	#[cfg_attr(test, assert_instr(vmovdqa64))]
36832	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36833	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36834	pub const unsafe fn _mm512_mask_load_epi64(
36835	src: __m512i,
36836	k: __mmask8,
36837	mem_addr: *const i64,
36838	) -> __m512i {
36839	let mask: Simd = simd_select_bitmask(m:k, yes:i64x8::splat(!`0`), no:i64x8::ZERO);
36840	simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i64x8()).as_m512i()
36841	}
36842
36843	/// Load packed 64-bit integers from memory into dst using zeromask k
36844	/// (elements are zeroed out when the corresponding mask bit is not set).
36845	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36846	///
36847	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi64)
36848	#[inline]
36849	#[target_feature(enable = "avx512f")]
36850	#[cfg_attr(test, assert_instr(vmovdqa64))]
36851	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36852	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36853	pub const unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
36854	_mm512_mask_load_epi64(src:_mm512_setzero_si512(), k, mem_addr)
36855	}
36856
36857	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
36858	/// (elements are copied from src when the corresponding mask bit is not set).
36859	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36860	///
36861	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_ps)
36862	#[inline]
36863	#[target_feature(enable = "avx512f")]
36864	#[cfg_attr(test, assert_instr(vmovaps))]
36865	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36866	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36867	pub const unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
36868	let mask: Simd = simd_select_bitmask(m:k, yes:i32x16::splat(!`0`), no:i32x16::ZERO);
36869	simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f32x16()).as_m512()
36870	}
36871
36872	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
36873	/// (elements are zeroed out when the corresponding mask bit is not set).
36874	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36875	///
36876	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_ps)
36877	#[inline]
36878	#[target_feature(enable = "avx512f")]
36879	#[cfg_attr(test, assert_instr(vmovaps))]
36880	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36881	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36882	pub const unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
36883	_mm512_mask_load_ps(src:_mm512_setzero_ps(), k, mem_addr)
36884	}
36885
36886	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
36887	/// (elements are copied from src when the corresponding mask bit is not set).
36888	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36889	///
36890	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_pd)
36891	#[inline]
36892	#[target_feature(enable = "avx512f")]
36893	#[cfg_attr(test, assert_instr(vmovapd))]
36894	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36895	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36896	pub const unsafe fn _mm512_mask_load_pd(
36897	src: __m512d,
36898	k: __mmask8,
36899	mem_addr: *const f64,
36900	) -> __m512d {
36901	let mask: Simd = simd_select_bitmask(m:k, yes:i64x8::splat(!`0`), no:i64x8::ZERO);
36902	simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f64x8()).as_m512d()
36903	}
36904
36905	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
36906	/// (elements are zeroed out when the corresponding mask bit is not set).
36907	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36908	///
36909	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_pd)
36910	#[inline]
36911	#[target_feature(enable = "avx512f")]
36912	#[cfg_attr(test, assert_instr(vmovapd))]
36913	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36914	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36915	pub const unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
36916	_mm512_mask_load_pd(src:_mm512_setzero_pd(), k, mem_addr)
36917	}
36918
36919	/// Load packed 32-bit integers from memory into dst using writemask k
36920	/// (elements are copied from src when the corresponding mask bit is not set).
36921	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36922	///
36923	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi32)
36924	#[inline]
36925	#[target_feature(enable = "avx512f,avx512vl")]
36926	#[cfg_attr(test, assert_instr(vmovdqa32))]
36927	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36928	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36929	pub const unsafe fn _mm256_mask_load_epi32(
36930	src: __m256i,
36931	k: __mmask8,
36932	mem_addr: *const i32,
36933	) -> __m256i {
36934	let mask: Simd = simd_select_bitmask(m:k, yes:i32x8::splat(!`0`), no:i32x8::ZERO);
36935	simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i32x8()).as_m256i()
36936	}
36937
36938	/// Load packed 32-bit integers from memory into dst using zeromask k
36939	/// (elements are zeroed out when the corresponding mask bit is not set).
36940	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36941	///
36942	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi32)
36943	#[inline]
36944	#[target_feature(enable = "avx512f,avx512vl")]
36945	#[cfg_attr(test, assert_instr(vmovdqa32))]
36946	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36947	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36948	pub const unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
36949	_mm256_mask_load_epi32(src:_mm256_setzero_si256(), k, mem_addr)
36950	}
36951
36952	/// Load packed 64-bit integers from memory into dst using writemask k
36953	/// (elements are copied from src when the corresponding mask bit is not set).
36954	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36955	///
36956	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi64)
36957	#[inline]
36958	#[target_feature(enable = "avx512f,avx512vl")]
36959	#[cfg_attr(test, assert_instr(vmovdqa64))]
36960	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36961	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36962	pub const unsafe fn _mm256_mask_load_epi64(
36963	src: __m256i,
36964	k: __mmask8,
36965	mem_addr: *const i64,
36966	) -> __m256i {
36967	let mask: Simd = simd_select_bitmask(m:k, yes:i64x4::splat(!`0`), no:i64x4::ZERO);
36968	simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i64x4()).as_m256i()
36969	}
36970
36971	/// Load packed 64-bit integers from memory into dst using zeromask k
36972	/// (elements are zeroed out when the corresponding mask bit is not set).
36973	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36974	///
36975	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi64)
36976	#[inline]
36977	#[target_feature(enable = "avx512f,avx512vl")]
36978	#[cfg_attr(test, assert_instr(vmovdqa64))]
36979	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36980	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36981	pub const unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
36982	_mm256_mask_load_epi64(src:_mm256_setzero_si256(), k, mem_addr)
36983	}
36984
36985	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
36986	/// (elements are copied from src when the corresponding mask bit is not set).
36987	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36988	///
36989	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_ps)
36990	#[inline]
36991	#[target_feature(enable = "avx512f,avx512vl")]
36992	#[cfg_attr(test, assert_instr(vmovaps))]
36993	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36994	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36995	pub const unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
36996	let mask: Simd = simd_select_bitmask(m:k, yes:i32x8::splat(!`0`), no:i32x8::ZERO);
36997	simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f32x8()).as_m256()
36998	}
36999
37000	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
37001	/// (elements are zeroed out when the corresponding mask bit is not set).
37002	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37003	///
37004	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_ps)
37005	#[inline]
37006	#[target_feature(enable = "avx512f,avx512vl")]
37007	#[cfg_attr(test, assert_instr(vmovaps))]
37008	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37009	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37010	pub const unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
37011	_mm256_mask_load_ps(src:_mm256_setzero_ps(), k, mem_addr)
37012	}
37013
37014	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
37015	/// (elements are copied from src when the corresponding mask bit is not set).
37016	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37017	///
37018	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_pd)
37019	#[inline]
37020	#[target_feature(enable = "avx512f,avx512vl")]
37021	#[cfg_attr(test, assert_instr(vmovapd))]
37022	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37023	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37024	pub const unsafe fn _mm256_mask_load_pd(
37025	src: __m256d,
37026	k: __mmask8,
37027	mem_addr: *const f64,
37028	) -> __m256d {
37029	let mask: Simd = simd_select_bitmask(m:k, yes:i64x4::splat(!`0`), no:i64x4::ZERO);
37030	simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f64x4()).as_m256d()
37031	}
37032
37033	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
37034	/// (elements are zeroed out when the corresponding mask bit is not set).
37035	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37036	///
37037	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_pd)
37038	#[inline]
37039	#[target_feature(enable = "avx512f,avx512vl")]
37040	#[cfg_attr(test, assert_instr(vmovapd))]
37041	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37042	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37043	pub const unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
37044	_mm256_mask_load_pd(src:_mm256_setzero_pd(), k, mem_addr)
37045	}
37046
37047	/// Load packed 32-bit integers from memory into dst using writemask k
37048	/// (elements are copied from src when the corresponding mask bit is not set).
37049	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37050	///
37051	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi32)
37052	#[inline]
37053	#[target_feature(enable = "avx512f,avx512vl")]
37054	#[cfg_attr(test, assert_instr(vmovdqa32))]
37055	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37056	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37057	pub const unsafe fn _mm_mask_load_epi32(
37058	src: __m128i,
37059	k: __mmask8,
37060	mem_addr: *const i32,
37061	) -> __m128i {
37062	let mask: Simd = simd_select_bitmask(m:k, yes:i32x4::splat(!`0`), no:i32x4::ZERO);
37063	simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i32x4()).as_m128i()
37064	}
37065
37066	/// Load packed 32-bit integers from memory into dst using zeromask k
37067	/// (elements are zeroed out when the corresponding mask bit is not set).
37068	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37069	///
37070	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi32)
37071	#[inline]
37072	#[target_feature(enable = "avx512f,avx512vl")]
37073	#[cfg_attr(test, assert_instr(vmovdqa32))]
37074	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37075	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37076	pub const unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
37077	_mm_mask_load_epi32(src:_mm_setzero_si128(), k, mem_addr)
37078	}
37079
37080	/// Load packed 64-bit integers from memory into dst using writemask k
37081	/// (elements are copied from src when the corresponding mask bit is not set).
37082	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37083	///
37084	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi64)
37085	#[inline]
37086	#[target_feature(enable = "avx512f,avx512vl")]
37087	#[cfg_attr(test, assert_instr(vmovdqa64))]
37088	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37089	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37090	pub const unsafe fn _mm_mask_load_epi64(
37091	src: __m128i,
37092	k: __mmask8,
37093	mem_addr: *const i64,
37094	) -> __m128i {
37095	let mask: Simd = simd_select_bitmask(m:k, yes:i64x2::splat(!`0`), no:i64x2::ZERO);
37096	simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i64x2()).as_m128i()
37097	}
37098
37099	/// Load packed 64-bit integers from memory into dst using zeromask k
37100	/// (elements are zeroed out when the corresponding mask bit is not set).
37101	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37102	///
37103	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi64)
37104	#[inline]
37105	#[target_feature(enable = "avx512f,avx512vl")]
37106	#[cfg_attr(test, assert_instr(vmovdqa64))]
37107	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37108	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37109	pub const unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
37110	_mm_mask_load_epi64(src:_mm_setzero_si128(), k, mem_addr)
37111	}
37112
37113	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
37114	/// (elements are copied from src when the corresponding mask bit is not set).
37115	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37116	///
37117	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_ps)
37118	#[inline]
37119	#[target_feature(enable = "avx512f,avx512vl")]
37120	#[cfg_attr(test, assert_instr(vmovaps))]
37121	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37122	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37123	pub const unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
37124	let mask: Simd = simd_select_bitmask(m:k, yes:i32x4::splat(!`0`), no:i32x4::ZERO);
37125	simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f32x4()).as_m128()
37126	}
37127
37128	/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
37129	/// (elements are zeroed out when the corresponding mask bit is not set).
37130	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37131	///
37132	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_ps)
37133	#[inline]
37134	#[target_feature(enable = "avx512f,avx512vl")]
37135	#[cfg_attr(test, assert_instr(vmovaps))]
37136	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37137	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37138	pub const unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
37139	_mm_mask_load_ps(src:_mm_setzero_ps(), k, mem_addr)
37140	}
37141
37142	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
37143	/// (elements are copied from src when the corresponding mask bit is not set).
37144	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37145	///
37146	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_pd)
37147	#[inline]
37148	#[target_feature(enable = "avx512f,avx512vl")]
37149	#[cfg_attr(test, assert_instr(vmovapd))]
37150	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37151	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37152	pub const unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
37153	let mask: Simd = simd_select_bitmask(m:k, yes:i64x2::splat(!`0`), no:i64x2::ZERO);
37154	simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f64x2()).as_m128d()
37155	}
37156
37157	/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
37158	/// (elements are zeroed out when the corresponding mask bit is not set).
37159	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37160	///
37161	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_pd)
37162	#[inline]
37163	#[target_feature(enable = "avx512f,avx512vl")]
37164	#[cfg_attr(test, assert_instr(vmovapd))]
37165	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37166	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37167	pub const unsafe fn _mm_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
37168	_mm_mask_load_pd(src:_mm_setzero_pd(), k, mem_addr)
37169	}
37170
37171	/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
37172	/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
37173	/// 3 packed elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
37174	/// exception may be generated.
37175	///
37176	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_ss)
37177	#[inline]
37178	#[cfg_attr(test, assert_instr(vmovss))]
37179	#[target_feature(enable = "avx512f")]
37180	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37181	pub unsafe fn _mm_mask_load_ss(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
37182	let mut dst: __m128 = src;
37183	asm!(
37184	vpl!("vmovss {dst}{{{k}}}"),
37185	p = in(reg) mem_addr,
37186	k = in(kreg) k,
37187	dst = inout(xmm_reg) dst,
37188	options(pure, readonly, nostack, preserves_flags),
37189	);
37190	dst
37191	}
37192
37193	/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
37194	/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper 3 packed
37195	/// elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
37196	/// exception may be generated.
37197	///
37198	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_ss)
37199	#[inline]
37200	#[cfg_attr(test, assert_instr(vmovss))]
37201	#[target_feature(enable = "avx512f")]
37202	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37203	pub unsafe fn _mm_maskz_load_ss(k: __mmask8, mem_addr: *const f32) -> __m128 {
37204	let mut dst: __m128;
37205	asm!(
37206	vpl!("vmovss {dst}{{{k}}} {{z}}"),
37207	p = in(reg) mem_addr,
37208	k = in(kreg) k,
37209	dst = out(xmm_reg) dst,
37210	options(pure, readonly, nostack, preserves_flags),
37211	);
37212	dst
37213	}
37214
37215	/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
37216	/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
37217	/// element of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
37218	/// exception may be generated.
37219	///
37220	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sd)
37221	#[inline]
37222	#[cfg_attr(test, assert_instr(vmovsd))]
37223	#[target_feature(enable = "avx512f")]
37224	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37225	pub unsafe fn _mm_mask_load_sd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
37226	let mut dst: __m128d = src;
37227	asm!(
37228	vpl!("vmovsd {dst}{{{k}}}"),
37229	p = in(reg) mem_addr,
37230	k = in(kreg) k,
37231	dst = inout(xmm_reg) dst,
37232	options(pure, readonly, nostack, preserves_flags),
37233	);
37234	dst
37235	}
37236
37237	/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
37238	/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper element
37239	/// of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection exception
37240	/// may be generated.
37241	///
37242	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sd)
37243	#[inline]
37244	#[cfg_attr(test, assert_instr(vmovsd))]
37245	#[target_feature(enable = "avx512f")]
37246	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37247	pub unsafe fn _mm_maskz_load_sd(k: __mmask8, mem_addr: *const f64) -> __m128d {
37248	let mut dst: __m128d;
37249	asm!(
37250	vpl!("vmovsd {dst}{{{k}}} {{z}}"),
37251	p = in(reg) mem_addr,
37252	k = in(kreg) k,
37253	dst = out(xmm_reg) dst,
37254	options(pure, readonly, nostack, preserves_flags),
37255	);
37256	dst
37257	}
37258
37259	/// Store packed 32-bit integers from a into memory using writemask k.
37260	/// mem_addr does not need to be aligned on any particular boundary.
37261	///
37262	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi32)
37263	#[inline]
37264	#[target_feature(enable = "avx512f")]
37265	#[cfg_attr(test, assert_instr(vmovdqu32))]
37266	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37267	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37268	pub const unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
37269	let mask: Simd = simd_select_bitmask(m:mask, yes:i32x16::splat(!`0`), no:i32x16::ZERO);
37270	simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x16());
37271	}
37272
37273	/// Store packed 64-bit integers from a into memory using writemask k.
37274	/// mem_addr does not need to be aligned on any particular boundary.
37275	///
37276	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi64)
37277	#[inline]
37278	#[target_feature(enable = "avx512f")]
37279	#[cfg_attr(test, assert_instr(vmovdqu64))]
37280	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37281	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37282	pub const unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
37283	let mask: Simd = simd_select_bitmask(m:mask, yes:i64x8::splat(!`0`), no:i64x8::ZERO);
37284	simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x8());
37285	}
37286
37287	/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
37288	/// mem_addr does not need to be aligned on any particular boundary.
37289	///
37290	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_ps)
37291	#[inline]
37292	#[target_feature(enable = "avx512f")]
37293	#[cfg_attr(test, assert_instr(vmovups))]
37294	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37295	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37296	pub const unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
37297	let mask: Simd = simd_select_bitmask(m:mask, yes:i32x16::splat(!`0`), no:i32x16::ZERO);
37298	simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f32x16());
37299	}
37300
37301	/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
37302	/// mem_addr does not need to be aligned on any particular boundary.
37303	///
37304	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_pd)
37305	#[inline]
37306	#[target_feature(enable = "avx512f")]
37307	#[cfg_attr(test, assert_instr(vmovupd))]
37308	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37309	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37310	pub const unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
37311	let mask: Simd = simd_select_bitmask(m:mask, yes:i64x8::splat(!`0`), no:i64x8::ZERO);
37312	simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f64x8());
37313	}
37314
37315	/// Store packed 32-bit integers from a into memory using writemask k.
37316	/// mem_addr does not need to be aligned on any particular boundary.
37317	///
37318	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi32)
37319	#[inline]
37320	#[target_feature(enable = "avx512f,avx512vl")]
37321	#[cfg_attr(test, assert_instr(vmovdqu32))]
37322	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37323	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37324	pub const unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
37325	let mask: Simd = simd_select_bitmask(m:mask, yes:i32x8::splat(!`0`), no:i32x8::ZERO);
37326	simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x8());
37327	}
37328
37329	/// Store packed 64-bit integers from a into memory using writemask k.
37330	/// mem_addr does not need to be aligned on any particular boundary.
37331	///
37332	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi64)
37333	#[inline]
37334	#[target_feature(enable = "avx512f,avx512vl")]
37335	#[cfg_attr(test, assert_instr(vmovdqu64))]
37336	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37337	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37338	pub const unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
37339	let mask: Simd = simd_select_bitmask(m:mask, yes:i64x4::splat(!`0`), no:i64x4::ZERO);
37340	simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x4());
37341	}
37342
37343	/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
37344	/// mem_addr does not need to be aligned on any particular boundary.
37345	///
37346	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_ps)
37347	#[inline]
37348	#[target_feature(enable = "avx512f,avx512vl")]
37349	#[cfg_attr(test, assert_instr(vmovups))]
37350	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37351	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37352	pub const unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
37353	let mask: Simd = simd_select_bitmask(m:mask, yes:i32x8::splat(!`0`), no:i32x8::ZERO);
37354	simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f32x8());
37355	}
37356
37357	/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
37358	/// mem_addr does not need to be aligned on any particular boundary.
37359	///
37360	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_pd)
37361	#[inline]
37362	#[target_feature(enable = "avx512f,avx512vl")]
37363	#[cfg_attr(test, assert_instr(vmovupd))]
37364	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37365	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37366	pub const unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
37367	let mask: Simd = simd_select_bitmask(m:mask, yes:i64x4::splat(!`0`), no:i64x4::ZERO);
37368	simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f64x4());
37369	}
37370
37371	/// Store packed 32-bit integers from a into memory using writemask k.
37372	/// mem_addr does not need to be aligned on any particular boundary.
37373	///
37374	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi32)
37375	#[inline]
37376	#[target_feature(enable = "avx512f,avx512vl")]
37377	#[cfg_attr(test, assert_instr(vmovdqu32))]
37378	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37379	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37380	pub const unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
37381	let mask: Simd = simd_select_bitmask(m:mask, yes:i32x4::splat(!`0`), no:i32x4::ZERO);
37382	simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x4());
37383	}
37384
37385	/// Store packed 64-bit integers from a into memory using writemask k.
37386	/// mem_addr does not need to be aligned on any particular boundary.
37387	///
37388	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi64)
37389	#[inline]
37390	#[target_feature(enable = "avx512f,avx512vl")]
37391	#[cfg_attr(test, assert_instr(vmovdqu64))]
37392	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37393	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37394	pub const unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
37395	let mask: Simd = simd_select_bitmask(m:mask, yes:i64x2::splat(!`0`), no:i64x2::ZERO);
37396	simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x2());
37397	}
37398
37399	/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
37400	/// mem_addr does not need to be aligned on any particular boundary.
37401	///
37402	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_ps)
37403	#[inline]
37404	#[target_feature(enable = "avx512f,avx512vl")]
37405	#[cfg_attr(test, assert_instr(vmovups))]
37406	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37407	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37408	pub const unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
37409	let mask: Simd = simd_select_bitmask(m:mask, yes:i32x4::splat(!`0`), no:i32x4::ZERO);
37410	simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f32x4());
37411	}
37412
37413	/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
37414	/// mem_addr does not need to be aligned on any particular boundary.
37415	///
37416	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_pd)
37417	#[inline]
37418	#[target_feature(enable = "avx512f,avx512vl")]
37419	#[cfg_attr(test, assert_instr(vmovupd))]
37420	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37421	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37422	pub const unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
37423	let mask: Simd = simd_select_bitmask(m:mask, yes:i64x2::splat(!`0`), no:i64x2::ZERO);
37424	simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f64x2());
37425	}
37426
37427	/// Store packed 32-bit integers from a into memory using writemask k.
37428	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
37429	///
37430	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi32)
37431	#[inline]
37432	#[target_feature(enable = "avx512f")]
37433	#[cfg_attr(test, assert_instr(vmovdqa32))]
37434	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37435	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37436	pub const unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
37437	let mask: Simd = simd_select_bitmask(m:mask, yes:i32x16::splat(!`0`), no:i32x16::ZERO);
37438	simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i32x16());
37439	}
37440
37441	/// Store packed 64-bit integers from a into memory using writemask k.
37442	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
37443	///
37444	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi64)
37445	#[inline]
37446	#[target_feature(enable = "avx512f")]
37447	#[cfg_attr(test, assert_instr(vmovdqa64))]
37448	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37449	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37450	pub const unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
37451	let mask: Simd = simd_select_bitmask(m:mask, yes:i64x8::splat(!`0`), no:i64x8::ZERO);
37452	simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i64x8());
37453	}
37454
37455	/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
37456	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
37457	///
37458	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_ps)
37459	#[inline]
37460	#[target_feature(enable = "avx512f")]
37461	#[cfg_attr(test, assert_instr(vmovaps))]
37462	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37463	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37464	pub const unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
37465	let mask: Simd = simd_select_bitmask(m:mask, yes:i32x16::splat(!`0`), no:i32x16::ZERO);
37466	simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f32x16());
37467	}
37468
37469	/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
37470	/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
37471	///
37472	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_pd)
37473	#[inline]
37474	#[target_feature(enable = "avx512f")]
37475	#[cfg_attr(test, assert_instr(vmovapd))]
37476	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37477	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37478	pub const unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
37479	let mask: Simd = simd_select_bitmask(m:mask, yes:i64x8::splat(!`0`), no:i64x8::ZERO);
37480	simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f64x8());
37481	}
37482
37483	/// Store packed 32-bit integers from a into memory using writemask k.
37484	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37485	///
37486	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi32)
37487	#[inline]
37488	#[target_feature(enable = "avx512f,avx512vl")]
37489	#[cfg_attr(test, assert_instr(vmovdqa32))]
37490	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37491	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37492	pub const unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
37493	let mask: Simd = simd_select_bitmask(m:mask, yes:i32x8::splat(!`0`), no:i32x8::ZERO);
37494	simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i32x8());
37495	}
37496
37497	/// Store packed 64-bit integers from a into memory using writemask k.
37498	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37499	///
37500	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi64)
37501	#[inline]
37502	#[target_feature(enable = "avx512f,avx512vl")]
37503	#[cfg_attr(test, assert_instr(vmovdqa64))]
37504	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37505	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37506	pub const unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
37507	let mask: Simd = simd_select_bitmask(m:mask, yes:i64x4::splat(!`0`), no:i64x4::ZERO);
37508	simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i64x4());
37509	}
37510
37511	/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
37512	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37513	///
37514	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_ps)
37515	#[inline]
37516	#[target_feature(enable = "avx512f,avx512vl")]
37517	#[cfg_attr(test, assert_instr(vmovaps))]
37518	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37519	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37520	pub const unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
37521	let mask: Simd = simd_select_bitmask(m:mask, yes:i32x8::splat(!`0`), no:i32x8::ZERO);
37522	simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f32x8());
37523	}
37524
37525	/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
37526	/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37527	///
37528	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_pd)
37529	#[inline]
37530	#[target_feature(enable = "avx512f,avx512vl")]
37531	#[cfg_attr(test, assert_instr(vmovapd))]
37532	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37533	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37534	pub const unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
37535	let mask: Simd = simd_select_bitmask(m:mask, yes:i64x4::splat(!`0`), no:i64x4::ZERO);
37536	simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f64x4());
37537	}
37538
37539	/// Store packed 32-bit integers from a into memory using writemask k.
37540	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37541	///
37542	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi32)
37543	#[inline]
37544	#[target_feature(enable = "avx512f,avx512vl")]
37545	#[cfg_attr(test, assert_instr(vmovdqa32))]
37546	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37547	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37548	pub const unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
37549	let mask: Simd = simd_select_bitmask(m:mask, yes:i32x4::splat(!`0`), no:i32x4::ZERO);
37550	simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i32x4());
37551	}
37552
37553	/// Store packed 64-bit integers from a into memory using writemask k.
37554	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37555	///
37556	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi64)
37557	#[inline]
37558	#[target_feature(enable = "avx512f,avx512vl")]
37559	#[cfg_attr(test, assert_instr(vmovdqa64))]
37560	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37561	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37562	pub const unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
37563	let mask: Simd = simd_select_bitmask(m:mask, yes:i64x2::splat(!`0`), no:i64x2::ZERO);
37564	simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i64x2());
37565	}
37566
37567	/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
37568	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37569	///
37570	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_ps)
37571	#[inline]
37572	#[target_feature(enable = "avx512f,avx512vl")]
37573	#[cfg_attr(test, assert_instr(vmovaps))]
37574	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37575	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37576	pub const unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
37577	let mask: Simd = simd_select_bitmask(m:mask, yes:i32x4::splat(!`0`), no:i32x4::ZERO);
37578	simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f32x4());
37579	}
37580
37581	/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
37582	/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37583	///
37584	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_pd)
37585	#[inline]
37586	#[target_feature(enable = "avx512f,avx512vl")]
37587	#[cfg_attr(test, assert_instr(vmovapd))]
37588	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37589	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37590	pub const unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
37591	let mask: Simd = simd_select_bitmask(m:mask, yes:i64x2::splat(!`0`), no:i64x2::ZERO);
37592	simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f64x2());
37593	}
37594
37595	/// Store a single-precision (32-bit) floating-point element from a into memory using writemask k. mem_addr
37596	/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37597	///
37598	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_ss)
37599	#[inline]
37600	#[cfg_attr(test, assert_instr(vmovss))]
37601	#[target_feature(enable = "avx512f")]
37602	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37603	pub unsafe fn _mm_mask_store_ss(mem_addr: *mut f32, k: __mmask8, a: __m128) {
37604	asm!(
37605	vps!("vmovss", "{{{k}}}, {a}"),
37606	p = in(reg) mem_addr,
37607	k = in(kreg) k,
37608	a = in(xmm_reg) a,
37609	options(nostack, preserves_flags),
37610	);
37611	}
37612
37613	/// Store a double-precision (64-bit) floating-point element from a into memory using writemask k. mem_addr
37614	/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37615	///
37616	/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sd)
37617	#[inline]
37618	#[cfg_attr(test, assert_instr(vmovsd))]
37619	#[target_feature(enable = "avx512f")]
37620	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37621	pub unsafe fn _mm_mask_store_sd(mem_addr: *mut f64, k: __mmask8, a: __m128d) {
37622	asm!(
37623	vps!("vmovsd", "{{{k}}}, {a}"),
37624	p = in(reg) mem_addr,
37625	k = in(kreg) k,
37626	a = in(xmm_reg) a,
37627	options(nostack, preserves_flags),
37628	);
37629	}
37630
37631	/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37632	///
37633	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi32)
37634	#[inline]
37635	#[target_feature(enable = "avx512f")]
37636	#[cfg_attr(test, assert_instr(vpexpandd))]
37637	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37638	pub unsafe fn _mm512_mask_expandloadu_epi32(
37639	src: __m512i,
37640	k: __mmask16,
37641	mem_addr: *const i32,
37642	) -> __m512i {
37643	transmute(src:expandloadd_512(mem_addr, a:src.as_i32x16(), mask:k))
37644	}
37645
37646	/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37647	///
37648	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi32)
37649	#[inline]
37650	#[target_feature(enable = "avx512f")]
37651	#[cfg_attr(test, assert_instr(vpexpandd))]
37652	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37653	pub unsafe fn _mm512_maskz_expandloadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
37654	_mm512_mask_expandloadu_epi32(src:_mm512_setzero_si512(), k, mem_addr)
37655	}
37656
37657	/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37658	///
37659	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi32)
37660	#[inline]
37661	#[target_feature(enable = "avx512f,avx512vl")]
37662	#[cfg_attr(test, assert_instr(vpexpandd))]
37663	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37664	pub unsafe fn _mm256_mask_expandloadu_epi32(
37665	src: __m256i,
37666	k: __mmask8,
37667	mem_addr: *const i32,
37668	) -> __m256i {
37669	transmute(src:expandloadd_256(mem_addr, a:src.as_i32x8(), mask:k))
37670	}
37671
37672	/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37673	///
37674	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi32)
37675	#[inline]
37676	#[target_feature(enable = "avx512f,avx512vl")]
37677	#[cfg_attr(test, assert_instr(vpexpandd))]
37678	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37679	pub unsafe fn _mm256_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
37680	_mm256_mask_expandloadu_epi32(src:_mm256_setzero_si256(), k, mem_addr)
37681	}
37682
37683	/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37684	///
37685	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi32)
37686	#[inline]
37687	#[target_feature(enable = "avx512f,avx512vl")]
37688	#[cfg_attr(test, assert_instr(vpexpandd))]
37689	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37690	pub unsafe fn _mm_mask_expandloadu_epi32(
37691	src: __m128i,
37692	k: __mmask8,
37693	mem_addr: *const i32,
37694	) -> __m128i {
37695	transmute(src:expandloadd_128(mem_addr, a:src.as_i32x4(), mask:k))
37696	}
37697
37698	/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37699	///
37700	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi32)
37701	#[inline]
37702	#[target_feature(enable = "avx512f,avx512vl")]
37703	#[cfg_attr(test, assert_instr(vpexpandd))]
37704	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37705	pub unsafe fn _mm_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
37706	_mm_mask_expandloadu_epi32(src:_mm_setzero_si128(), k, mem_addr)
37707	}
37708
37709	/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37710	///
37711	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi64)
37712	#[inline]
37713	#[target_feature(enable = "avx512f")]
37714	#[cfg_attr(test, assert_instr(vpexpandq))]
37715	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37716	pub unsafe fn _mm512_mask_expandloadu_epi64(
37717	src: __m512i,
37718	k: __mmask8,
37719	mem_addr: *const i64,
37720	) -> __m512i {
37721	transmute(src:expandloadq_512(mem_addr, a:src.as_i64x8(), mask:k))
37722	}
37723
37724	/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37725	///
37726	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi64)
37727	#[inline]
37728	#[target_feature(enable = "avx512f")]
37729	#[cfg_attr(test, assert_instr(vpexpandq))]
37730	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37731	pub unsafe fn _mm512_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
37732	_mm512_mask_expandloadu_epi64(src:_mm512_setzero_si512(), k, mem_addr)
37733	}
37734
37735	/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37736	///
37737	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi64)
37738	#[inline]
37739	#[target_feature(enable = "avx512f,avx512vl")]
37740	#[cfg_attr(test, assert_instr(vpexpandq))]
37741	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37742	pub unsafe fn _mm256_mask_expandloadu_epi64(
37743	src: __m256i,
37744	k: __mmask8,
37745	mem_addr: *const i64,
37746	) -> __m256i {
37747	transmute(src:expandloadq_256(mem_addr, a:src.as_i64x4(), mask:k))
37748	}
37749
37750	/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37751	///
37752	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi64)
37753	#[inline]
37754	#[target_feature(enable = "avx512f,avx512vl")]
37755	#[cfg_attr(test, assert_instr(vpexpandq))]
37756	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37757	pub unsafe fn _mm256_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
37758	_mm256_mask_expandloadu_epi64(src:_mm256_setzero_si256(), k, mem_addr)
37759	}
37760
37761	/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37762	///
37763	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi64)
37764	#[inline]
37765	#[target_feature(enable = "avx512f,avx512vl")]
37766	#[cfg_attr(test, assert_instr(vpexpandq))]
37767	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37768	pub unsafe fn _mm_mask_expandloadu_epi64(
37769	src: __m128i,
37770	k: __mmask8,
37771	mem_addr: *const i64,
37772	) -> __m128i {
37773	transmute(src:expandloadq_128(mem_addr, a:src.as_i64x2(), mask:k))
37774	}
37775
37776	/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37777	///
37778	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi64)
37779	#[inline]
37780	#[target_feature(enable = "avx512f,avx512vl")]
37781	#[cfg_attr(test, assert_instr(vpexpandq))]
37782	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37783	pub unsafe fn _mm_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
37784	_mm_mask_expandloadu_epi64(src:_mm_setzero_si128(), k, mem_addr)
37785	}
37786
37787	/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37788	///
37789	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_ps)
37790	#[inline]
37791	#[target_feature(enable = "avx512f")]
37792	#[cfg_attr(test, assert_instr(vexpandps))]
37793	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37794	pub unsafe fn _mm512_mask_expandloadu_ps(
37795	src: __m512,
37796	k: __mmask16,
37797	mem_addr: *const f32,
37798	) -> __m512 {
37799	transmute(src:expandloadps_512(mem_addr, a:src.as_f32x16(), mask:k))
37800	}
37801
37802	/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37803	///
37804	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_ps)
37805	#[inline]
37806	#[target_feature(enable = "avx512f")]
37807	#[cfg_attr(test, assert_instr(vexpandps))]
37808	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37809	pub unsafe fn _mm512_maskz_expandloadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
37810	_mm512_mask_expandloadu_ps(src:_mm512_setzero_ps(), k, mem_addr)
37811	}
37812
37813	/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37814	///
37815	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_ps)
37816	#[inline]
37817	#[target_feature(enable = "avx512f,avx512vl")]
37818	#[cfg_attr(test, assert_instr(vexpandps))]
37819	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37820	pub unsafe fn _mm256_mask_expandloadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
37821	transmute(src:expandloadps_256(mem_addr, a:src.as_f32x8(), mask:k))
37822	}
37823
37824	/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37825	///
37826	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_ps)
37827	#[inline]
37828	#[target_feature(enable = "avx512f,avx512vl")]
37829	#[cfg_attr(test, assert_instr(vexpandps))]
37830	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37831	pub unsafe fn _mm256_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
37832	_mm256_mask_expandloadu_ps(src:_mm256_setzero_ps(), k, mem_addr)
37833	}
37834
37835	/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37836	///
37837	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_ps)
37838	#[inline]
37839	#[target_feature(enable = "avx512f,avx512vl")]
37840	#[cfg_attr(test, assert_instr(vexpandps))]
37841	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37842	pub unsafe fn _mm_mask_expandloadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
37843	transmute(src:expandloadps_128(mem_addr, a:src.as_f32x4(), mask:k))
37844	}
37845
37846	/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37847	///
37848	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_ps)
37849	#[inline]
37850	#[target_feature(enable = "avx512f,avx512vl")]
37851	#[cfg_attr(test, assert_instr(vexpandps))]
37852	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37853	pub unsafe fn _mm_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
37854	_mm_mask_expandloadu_ps(src:_mm_setzero_ps(), k, mem_addr)
37855	}
37856
37857	/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37858	///
37859	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_pd)
37860	#[inline]
37861	#[target_feature(enable = "avx512f")]
37862	#[cfg_attr(test, assert_instr(vexpandpd))]
37863	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37864	pub unsafe fn _mm512_mask_expandloadu_pd(
37865	src: __m512d,
37866	k: __mmask8,
37867	mem_addr: *const f64,
37868	) -> __m512d {
37869	transmute(src:expandloadpd_512(mem_addr, a:src.as_f64x8(), mask:k))
37870	}
37871
37872	/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37873	///
37874	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_pd)
37875	#[inline]
37876	#[target_feature(enable = "avx512f")]
37877	#[cfg_attr(test, assert_instr(vexpandpd))]
37878	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37879	pub unsafe fn _mm512_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
37880	_mm512_mask_expandloadu_pd(src:_mm512_setzero_pd(), k, mem_addr)
37881	}
37882
37883	/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37884	///
37885	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_pd)
37886	#[inline]
37887	#[target_feature(enable = "avx512f,avx512vl")]
37888	#[cfg_attr(test, assert_instr(vexpandpd))]
37889	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37890	pub unsafe fn _mm256_mask_expandloadu_pd(
37891	src: __m256d,
37892	k: __mmask8,
37893	mem_addr: *const f64,
37894	) -> __m256d {
37895	transmute(src:expandloadpd_256(mem_addr, a:src.as_f64x4(), mask:k))
37896	}
37897
37898	/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37899	///
37900	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_pd)
37901	#[inline]
37902	#[target_feature(enable = "avx512f,avx512vl")]
37903	#[cfg_attr(test, assert_instr(vexpandpd))]
37904	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37905	pub unsafe fn _mm256_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
37906	_mm256_mask_expandloadu_pd(src:_mm256_setzero_pd(), k, mem_addr)
37907	}
37908
37909	/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37910	///
37911	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_pd)
37912	#[inline]
37913	#[target_feature(enable = "avx512f,avx512vl")]
37914	#[cfg_attr(test, assert_instr(vexpandpd))]
37915	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37916	pub unsafe fn _mm_mask_expandloadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
37917	transmute(src:expandloadpd_128(mem_addr, a:src.as_f64x2(), mask:k))
37918	}
37919
37920	/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37921	///
37922	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_pd)
37923	#[inline]
37924	#[target_feature(enable = "avx512f,avx512vl")]
37925	#[cfg_attr(test, assert_instr(vexpandpd))]
37926	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37927	pub unsafe fn _mm_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
37928	_mm_mask_expandloadu_pd(src:_mm_setzero_pd(), k, mem_addr)
37929	}
37930
37931	/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values in reverse order.
37932	///
37933	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_pd&expand=5002)
37934	#[inline]
37935	#[target_feature(enable = "avx512f")]
37936	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37937	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37938	pub const fn _mm512_setr_pd(
37939	e0: f64,
37940	e1: f64,
37941	e2: f64,
37942	e3: f64,
37943	e4: f64,
37944	e5: f64,
37945	e6: f64,
37946	e7: f64,
37947	) -> __m512d {
37948	unsafe {
37949	let r: Simd = f64x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7);
37950	transmute(src:r)
37951	}
37952	}
37953
37954	/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values.
37955	///
37956	/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_pd&expand=4924)
37957	#[inline]
37958	#[target_feature(enable = "avx512f")]
37959	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37960	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37961	pub const fn _mm512_set_pd(
37962	e0: f64,
37963	e1: f64,
37964	e2: f64,
37965	e3: f64,
37966	e4: f64,
37967	e5: f64,
37968	e6: f64,
37969	e7: f64,
37970	) -> __m512d {
37971	_mm512_setr_pd(e0:e7, e1:e6, e2:e5, e3:e4, e4:e3, e5:e2, e6:e1, e7:e0)
37972	}
37973
37974	/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37975	///
37976	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_ss&expand=3832)
37977	#[inline]
37978	#[target_feature(enable = "avx512f")]
37979	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37980	#[cfg_attr(test, assert_instr(vmovss))]
37981	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37982	pub const fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
37983	unsafe {
37984	let extractsrc: f32 = simd_extract!(src, `0`);
37985	let mut mov: f32 = extractsrc;
37986	if (k & `0b00000001`) != `0` {
37987	mov = simd_extract!(b, `0`);
37988	}
37989	simd_insert!(a, `0`, mov)
37990	}
37991	}
37992
37993	/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37994	///
37995	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_ss&expand=3833)
37996	#[inline]
37997	#[target_feature(enable = "avx512f")]
37998	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37999	#[cfg_attr(test, assert_instr(vmovss))]
38000	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38001	pub const fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38002	unsafe {
38003	let mut mov: f32 = `0.`;
38004	if (k & `0b00000001`) != `0` {
38005	mov = simd_extract!(b, `0`);
38006	}
38007	simd_insert!(a, `0`, mov)
38008	}
38009	}
38010
38011	/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38012	///
38013	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_sd&expand=3829)
38014	#[inline]
38015	#[target_feature(enable = "avx512f")]
38016	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38017	#[cfg_attr(test, assert_instr(vmovsd))]
38018	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38019	pub const fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38020	unsafe {
38021	let extractsrc: f64 = simd_extract!(src, `0`);
38022	let mut mov: f64 = extractsrc;
38023	if (k & `0b00000001`) != `0` {
38024	mov = simd_extract!(b, `0`);
38025	}
38026	simd_insert!(a, `0`, mov)
38027	}
38028	}
38029
38030	/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38031	///
38032	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_sd&expand=3830)
38033	#[inline]
38034	#[target_feature(enable = "avx512f")]
38035	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38036	#[cfg_attr(test, assert_instr(vmovsd))]
38037	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38038	pub const fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38039	unsafe {
38040	let mut mov: f64 = `0.`;
38041	if (k & `0b00000001`) != `0` {
38042	mov = simd_extract!(b, `0`);
38043	}
38044	simd_insert!(a, `0`, mov)
38045	}
38046	}
38047
38048	/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38049	///
38050	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_ss&expand=159)
38051	#[inline]
38052	#[target_feature(enable = "avx512f")]
38053	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38054	#[cfg_attr(test, assert_instr(vaddss))]
38055	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38056	pub const fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38057	unsafe {
38058	let extractsrc: f32 = simd_extract!(src, `0`);
38059	let mut add: f32 = extractsrc;
38060	if (k & `0b00000001`) != `0` {
38061	let extracta: f32 = simd_extract!(a, `0`);
38062	let extractb: f32 = simd_extract!(b, `0`);
38063	add = extracta + extractb;
38064	}
38065	simd_insert!(a, `0`, add)
38066	}
38067	}
38068
38069	/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38070	///
38071	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_ss&expand=160)
38072	#[inline]
38073	#[target_feature(enable = "avx512f")]
38074	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38075	#[cfg_attr(test, assert_instr(vaddss))]
38076	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38077	pub const fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38078	unsafe {
38079	let mut add: f32 = `0.`;
38080	if (k & `0b00000001`) != `0` {
38081	let extracta: f32 = simd_extract!(a, `0`);
38082	let extractb: f32 = simd_extract!(b, `0`);
38083	add = extracta + extractb;
38084	}
38085	simd_insert!(a, `0`, add)
38086	}
38087	}
38088
38089	/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38090	///
38091	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_sd&expand=155)
38092	#[inline]
38093	#[target_feature(enable = "avx512f")]
38094	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38095	#[cfg_attr(test, assert_instr(vaddsd))]
38096	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38097	pub const fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38098	unsafe {
38099	let extractsrc: f64 = simd_extract!(src, `0`);
38100	let mut add: f64 = extractsrc;
38101	if (k & `0b00000001`) != `0` {
38102	let extracta: f64 = simd_extract!(a, `0`);
38103	let extractb: f64 = simd_extract!(b, `0`);
38104	add = extracta + extractb;
38105	}
38106	simd_insert!(a, `0`, add)
38107	}
38108	}
38109
38110	/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38111	///
38112	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_sd&expand=156)
38113	#[inline]
38114	#[target_feature(enable = "avx512f")]
38115	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38116	#[cfg_attr(test, assert_instr(vaddsd))]
38117	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38118	pub const fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38119	unsafe {
38120	let mut add: f64 = `0.`;
38121	if (k & `0b00000001`) != `0` {
38122	let extracta: f64 = simd_extract!(a, `0`);
38123	let extractb: f64 = simd_extract!(b, `0`);
38124	add = extracta + extractb;
38125	}
38126	simd_insert!(a, `0`, add)
38127	}
38128	}
38129
38130	/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38131	///
38132	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_ss&expand=5750)
38133	#[inline]
38134	#[target_feature(enable = "avx512f")]
38135	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38136	#[cfg_attr(test, assert_instr(vsubss))]
38137	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38138	pub const fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38139	unsafe {
38140	let extractsrc: f32 = simd_extract!(src, `0`);
38141	let mut add: f32 = extractsrc;
38142	if (k & `0b00000001`) != `0` {
38143	let extracta: f32 = simd_extract!(a, `0`);
38144	let extractb: f32 = simd_extract!(b, `0`);
38145	add = extracta - extractb;
38146	}
38147	simd_insert!(a, `0`, add)
38148	}
38149	}
38150
38151	/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38152	///
38153	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_ss&expand=5751)
38154	#[inline]
38155	#[target_feature(enable = "avx512f")]
38156	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38157	#[cfg_attr(test, assert_instr(vsubss))]
38158	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38159	pub const fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38160	unsafe {
38161	let mut add: f32 = `0.`;
38162	if (k & `0b00000001`) != `0` {
38163	let extracta: f32 = simd_extract!(a, `0`);
38164	let extractb: f32 = simd_extract!(b, `0`);
38165	add = extracta - extractb;
38166	}
38167	simd_insert!(a, `0`, add)
38168	}
38169	}
38170
38171	/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38172	///
38173	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_sd&expand=5746)
38174	#[inline]
38175	#[target_feature(enable = "avx512f")]
38176	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38177	#[cfg_attr(test, assert_instr(vsubsd))]
38178	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38179	pub const fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38180	unsafe {
38181	let extractsrc: f64 = simd_extract!(src, `0`);
38182	let mut add: f64 = extractsrc;
38183	if (k & `0b00000001`) != `0` {
38184	let extracta: f64 = simd_extract!(a, `0`);
38185	let extractb: f64 = simd_extract!(b, `0`);
38186	add = extracta - extractb;
38187	}
38188	simd_insert!(a, `0`, add)
38189	}
38190	}
38191
38192	/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38193	///
38194	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_sd&expand=5747)
38195	#[inline]
38196	#[target_feature(enable = "avx512f")]
38197	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38198	#[cfg_attr(test, assert_instr(vsubsd))]
38199	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38200	pub const fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38201	unsafe {
38202	let mut add: f64 = `0.`;
38203	if (k & `0b00000001`) != `0` {
38204	let extracta: f64 = simd_extract!(a, `0`);
38205	let extractb: f64 = simd_extract!(b, `0`);
38206	add = extracta - extractb;
38207	}
38208	simd_insert!(a, `0`, add)
38209	}
38210	}
38211
38212	/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38213	///
38214	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_ss&expand=3950)
38215	#[inline]
38216	#[target_feature(enable = "avx512f")]
38217	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38218	#[cfg_attr(test, assert_instr(vmulss))]
38219	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38220	pub const fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38221	unsafe {
38222	let extractsrc: f32 = simd_extract!(src, `0`);
38223	let mut add: f32 = extractsrc;
38224	if (k & `0b00000001`) != `0` {
38225	let extracta: f32 = simd_extract!(a, `0`);
38226	let extractb: f32 = simd_extract!(b, `0`);
38227	add = extracta * extractb;
38228	}
38229	simd_insert!(a, `0`, add)
38230	}
38231	}
38232
38233	/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38234	///
38235	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_ss&expand=3951)
38236	#[inline]
38237	#[target_feature(enable = "avx512f")]
38238	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38239	#[cfg_attr(test, assert_instr(vmulss))]
38240	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38241	pub const fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38242	unsafe {
38243	let mut add: f32 = `0.`;
38244	if (k & `0b00000001`) != `0` {
38245	let extracta: f32 = simd_extract!(a, `0`);
38246	let extractb: f32 = simd_extract!(b, `0`);
38247	add = extracta * extractb;
38248	}
38249	simd_insert!(a, `0`, add)
38250	}
38251	}
38252
38253	/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38254	///
38255	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_sd&expand=3947)
38256	#[inline]
38257	#[target_feature(enable = "avx512f")]
38258	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38259	#[cfg_attr(test, assert_instr(vmulsd))]
38260	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38261	pub const fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38262	unsafe {
38263	let extractsrc: f64 = simd_extract!(src, `0`);
38264	let mut add: f64 = extractsrc;
38265	if (k & `0b00000001`) != `0` {
38266	let extracta: f64 = simd_extract!(a, `0`);
38267	let extractb: f64 = simd_extract!(b, `0`);
38268	add = extracta * extractb;
38269	}
38270	simd_insert!(a, `0`, add)
38271	}
38272	}
38273
38274	/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38275	///
38276	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_sd&expand=3948)
38277	#[inline]
38278	#[target_feature(enable = "avx512f")]
38279	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38280	#[cfg_attr(test, assert_instr(vmulsd))]
38281	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38282	pub const fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38283	unsafe {
38284	let mut add: f64 = `0.`;
38285	if (k & `0b00000001`) != `0` {
38286	let extracta: f64 = simd_extract!(a, `0`);
38287	let extractb: f64 = simd_extract!(b, `0`);
38288	add = extracta * extractb;
38289	}
38290	simd_insert!(a, `0`, add)
38291	}
38292	}
38293
38294	/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38295	///
38296	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_ss&expand=2181)
38297	#[inline]
38298	#[target_feature(enable = "avx512f")]
38299	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38300	#[cfg_attr(test, assert_instr(vdivss))]
38301	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38302	pub const fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38303	unsafe {
38304	let extractsrc: f32 = simd_extract!(src, `0`);
38305	let mut add: f32 = extractsrc;
38306	if (k & `0b00000001`) != `0` {
38307	let extracta: f32 = simd_extract!(a, `0`);
38308	let extractb: f32 = simd_extract!(b, `0`);
38309	add = extracta / extractb;
38310	}
38311	simd_insert!(a, `0`, add)
38312	}
38313	}
38314
38315	/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38316	///
38317	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_ss&expand=2182)
38318	#[inline]
38319	#[target_feature(enable = "avx512f")]
38320	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38321	#[cfg_attr(test, assert_instr(vdivss))]
38322	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38323	pub const fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38324	unsafe {
38325	let mut add: f32 = `0.`;
38326	if (k & `0b00000001`) != `0` {
38327	let extracta: f32 = simd_extract!(a, `0`);
38328	let extractb: f32 = simd_extract!(b, `0`);
38329	add = extracta / extractb;
38330	}
38331	simd_insert!(a, `0`, add)
38332	}
38333	}
38334
38335	/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38336	///
38337	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_sd&expand=2178)
38338	#[inline]
38339	#[target_feature(enable = "avx512f")]
38340	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38341	#[cfg_attr(test, assert_instr(vdivsd))]
38342	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38343	pub const fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38344	unsafe {
38345	let extractsrc: f64 = simd_extract!(src, `0`);
38346	let mut add: f64 = extractsrc;
38347	if (k & `0b00000001`) != `0` {
38348	let extracta: f64 = simd_extract!(a, `0`);
38349	let extractb: f64 = simd_extract!(b, `0`);
38350	add = extracta / extractb;
38351	}
38352	simd_insert!(a, `0`, add)
38353	}
38354	}
38355
38356	/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38357	///
38358	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_sd&expand=2179)
38359	#[inline]
38360	#[target_feature(enable = "avx512f")]
38361	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38362	#[cfg_attr(test, assert_instr(vdivsd))]
38363	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38364	pub const fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38365	unsafe {
38366	let mut add: f64 = `0.`;
38367	if (k & `0b00000001`) != `0` {
38368	let extracta: f64 = simd_extract!(a, `0`);
38369	let extractb: f64 = simd_extract!(b, `0`);
38370	add = extracta / extractb;
38371	}
38372	simd_insert!(a, `0`, add)
38373	}
38374	}
38375
38376	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38377	///
38378	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_ss&expand=3672)
38379	#[inline]
38380	#[target_feature(enable = "avx512f")]
38381	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38382	#[cfg_attr(test, assert_instr(vmaxss))]
38383	pub fn _mm_mask_max_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38384	unsafe {
38385	transmute(src:vmaxss(
38386	a.as_f32x4(),
38387	b.as_f32x4(),
38388	src.as_f32x4(),
38389	mask:k,
38390	_MM_FROUND_CUR_DIRECTION,
38391	))
38392	}
38393	}
38394
38395	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38396	///
38397	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_ss&expand=3673)
38398	#[inline]
38399	#[target_feature(enable = "avx512f")]
38400	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38401	#[cfg_attr(test, assert_instr(vmaxss))]
38402	pub fn _mm_maskz_max_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38403	unsafe {
38404	transmute(src:vmaxss(
38405	a.as_f32x4(),
38406	b.as_f32x4(),
38407	src:f32x4::ZERO,
38408	mask:k,
38409	_MM_FROUND_CUR_DIRECTION,
38410	))
38411	}
38412	}
38413
38414	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38415	///
38416	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_sd&expand=3669)
38417	#[inline]
38418	#[target_feature(enable = "avx512f")]
38419	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38420	#[cfg_attr(test, assert_instr(vmaxsd))]
38421	pub fn _mm_mask_max_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38422	unsafe {
38423	transmute(src:vmaxsd(
38424	a.as_f64x2(),
38425	b.as_f64x2(),
38426	src.as_f64x2(),
38427	mask:k,
38428	_MM_FROUND_CUR_DIRECTION,
38429	))
38430	}
38431	}
38432
38433	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38434	///
38435	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_sd&expand=3670)
38436	#[inline]
38437	#[target_feature(enable = "avx512f")]
38438	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38439	#[cfg_attr(test, assert_instr(vmaxsd))]
38440	pub fn _mm_maskz_max_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38441	unsafe {
38442	transmute(src:vmaxsd(
38443	a.as_f64x2(),
38444	b.as_f64x2(),
38445	src:f64x2::ZERO,
38446	mask:k,
38447	_MM_FROUND_CUR_DIRECTION,
38448	))
38449	}
38450	}
38451
38452	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38453	///
38454	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_ss&expand=3786)
38455	#[inline]
38456	#[target_feature(enable = "avx512f")]
38457	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38458	#[cfg_attr(test, assert_instr(vminss))]
38459	pub fn _mm_mask_min_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38460	unsafe {
38461	transmute(src:vminss(
38462	a.as_f32x4(),
38463	b.as_f32x4(),
38464	src.as_f32x4(),
38465	mask:k,
38466	_MM_FROUND_CUR_DIRECTION,
38467	))
38468	}
38469	}
38470
38471	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38472	///
38473	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_ss&expand=3787)
38474	#[inline]
38475	#[target_feature(enable = "avx512f")]
38476	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38477	#[cfg_attr(test, assert_instr(vminss))]
38478	pub fn _mm_maskz_min_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38479	unsafe {
38480	transmute(src:vminss(
38481	a.as_f32x4(),
38482	b.as_f32x4(),
38483	src:f32x4::ZERO,
38484	mask:k,
38485	_MM_FROUND_CUR_DIRECTION,
38486	))
38487	}
38488	}
38489
38490	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38491	///
38492	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_sd&expand=3783)
38493	#[inline]
38494	#[target_feature(enable = "avx512f")]
38495	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38496	#[cfg_attr(test, assert_instr(vminsd))]
38497	pub fn _mm_mask_min_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38498	unsafe {
38499	transmute(src:vminsd(
38500	a.as_f64x2(),
38501	b.as_f64x2(),
38502	src.as_f64x2(),
38503	mask:k,
38504	_MM_FROUND_CUR_DIRECTION,
38505	))
38506	}
38507	}
38508
38509	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38510	///
38511	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_sd&expand=3784)
38512	#[inline]
38513	#[target_feature(enable = "avx512f")]
38514	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38515	#[cfg_attr(test, assert_instr(vminsd))]
38516	pub fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38517	unsafe {
38518	transmute(src:vminsd(
38519	a.as_f64x2(),
38520	b.as_f64x2(),
38521	src:f64x2::ZERO,
38522	mask:k,
38523	_MM_FROUND_CUR_DIRECTION,
38524	))
38525	}
38526	}
38527
38528	/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38529	///
38530	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_ss&expand=5387)
38531	#[inline]
38532	#[target_feature(enable = "avx512f")]
38533	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38534	#[cfg_attr(test, assert_instr(vsqrtss))]
38535	pub fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38536	unsafe { vsqrtss(a, b, src, mask:k, _MM_FROUND_CUR_DIRECTION) }
38537	}
38538
38539	/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38540	///
38541	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_ss&expand=5388)
38542	#[inline]
38543	#[target_feature(enable = "avx512f")]
38544	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38545	#[cfg_attr(test, assert_instr(vsqrtss))]
38546	pub fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38547	unsafe { vsqrtss(a, b, src:_mm_setzero_ps(), mask:k, _MM_FROUND_CUR_DIRECTION) }
38548	}
38549
38550	/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38551	///
38552	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_sd&expand=5384)
38553	#[inline]
38554	#[target_feature(enable = "avx512f")]
38555	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38556	#[cfg_attr(test, assert_instr(vsqrtsd))]
38557	pub fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38558	unsafe { vsqrtsd(a, b, src, mask:k, _MM_FROUND_CUR_DIRECTION) }
38559	}
38560
38561	/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38562	///
38563	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_sd&expand=5385)
38564	#[inline]
38565	#[target_feature(enable = "avx512f")]
38566	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38567	#[cfg_attr(test, assert_instr(vsqrtsd))]
38568	pub fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38569	unsafe { vsqrtsd(a, b, src:_mm_setzero_pd(), mask:k, _MM_FROUND_CUR_DIRECTION) }
38570	}
38571
38572	/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
38573	///
38574	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_ss&expand=4825)
38575	#[inline]
38576	#[target_feature(enable = "avx512f")]
38577	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38578	#[cfg_attr(test, assert_instr(vrsqrt14ss))]
38579	pub fn _mm_rsqrt14_ss(a: __m128, b: __m128) -> __m128 {
38580	unsafe { transmute(src:vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:`0b1`)) }
38581	}
38582
38583	/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
38584	///
38585	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_ss&expand=4823)
38586	#[inline]
38587	#[target_feature(enable = "avx512f")]
38588	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38589	#[cfg_attr(test, assert_instr(vrsqrt14ss))]
38590	pub fn _mm_mask_rsqrt14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38591	unsafe { transmute(src:vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), mask:k)) }
38592	}
38593
38594	/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
38595	///
38596	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_ss&expand=4824)
38597	#[inline]
38598	#[target_feature(enable = "avx512f")]
38599	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38600	#[cfg_attr(test, assert_instr(vrsqrt14ss))]
38601	pub fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38602	unsafe { transmute(src:vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:k)) }
38603	}
38604
38605	/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
38606	///
38607	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_sd&expand=4822)
38608	#[inline]
38609	#[target_feature(enable = "avx512f")]
38610	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38611	#[cfg_attr(test, assert_instr(vrsqrt14sd))]
38612	pub fn _mm_rsqrt14_sd(a: __m128d, b: __m128d) -> __m128d {
38613	unsafe { transmute(src:vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:`0b1`)) }
38614	}
38615
38616	/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
38617	///
38618	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_sd&expand=4820)
38619	#[inline]
38620	#[target_feature(enable = "avx512f")]
38621	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38622	#[cfg_attr(test, assert_instr(vrsqrt14sd))]
38623	pub fn _mm_mask_rsqrt14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38624	unsafe { transmute(src:vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), mask:k)) }
38625	}
38626
38627	/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
38628	///
38629	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_sd&expand=4821)
38630	#[inline]
38631	#[target_feature(enable = "avx512f")]
38632	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38633	#[cfg_attr(test, assert_instr(vrsqrt14sd))]
38634	pub fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38635	unsafe { transmute(src:vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:k)) }
38636	}
38637
38638	/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
38639	///
38640	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_ss&expand=4508)
38641	#[inline]
38642	#[target_feature(enable = "avx512f")]
38643	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38644	#[cfg_attr(test, assert_instr(vrcp14ss))]
38645	pub fn _mm_rcp14_ss(a: __m128, b: __m128) -> __m128 {
38646	unsafe { transmute(src:vrcp14ss(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:`0b1`)) }
38647	}
38648
38649	/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
38650	///
38651	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_ss&expand=4506)
38652	#[inline]
38653	#[target_feature(enable = "avx512f")]
38654	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38655	#[cfg_attr(test, assert_instr(vrcp14ss))]
38656	pub fn _mm_mask_rcp14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38657	unsafe { transmute(src:vrcp14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), mask:k)) }
38658	}
38659
38660	/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
38661	///
38662	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_ss&expand=4507)
38663	#[inline]
38664	#[target_feature(enable = "avx512f")]
38665	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38666	#[cfg_attr(test, assert_instr(vrcp14ss))]
38667	pub fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38668	unsafe { transmute(src:vrcp14ss(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:k)) }
38669	}
38670
38671	/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
38672	///
38673	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_sd&expand=4505)
38674	#[inline]
38675	#[target_feature(enable = "avx512f")]
38676	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38677	#[cfg_attr(test, assert_instr(vrcp14sd))]
38678	pub fn _mm_rcp14_sd(a: __m128d, b: __m128d) -> __m128d {
38679	unsafe { transmute(src:vrcp14sd(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:`0b1`)) }
38680	}
38681
38682	/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
38683	///
38684	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_sd&expand=4503)
38685	#[inline]
38686	#[target_feature(enable = "avx512f")]
38687	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38688	#[cfg_attr(test, assert_instr(vrcp14sd))]
38689	pub fn _mm_mask_rcp14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38690	unsafe { transmute(src:vrcp14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), mask:k)) }
38691	}
38692
38693	/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
38694	///
38695	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_sd&expand=4504)
38696	#[inline]
38697	#[target_feature(enable = "avx512f")]
38698	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38699	#[cfg_attr(test, assert_instr(vrcp14sd))]
38700	pub fn _mm_maskz_rcp14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38701	unsafe { transmute(src:vrcp14sd(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:k)) }
38702	}
38703
38704	/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
38705	///
38706	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_ss&expand=2862)
38707	#[inline]
38708	#[target_feature(enable = "avx512f")]
38709	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38710	#[cfg_attr(test, assert_instr(vgetexpss))]
38711	pub fn _mm_getexp_ss(a: __m128, b: __m128) -> __m128 {
38712	unsafe {
38713	transmute(src:vgetexpss(
38714	a.as_f32x4(),
38715	b.as_f32x4(),
38716	src:f32x4::ZERO,
38717	mask:`0b1`,
38718	_MM_FROUND_NO_EXC,
38719	))
38720	}
38721	}
38722
38723	/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
38724	///
38725	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_ss&expand=2863)
38726	#[inline]
38727	#[target_feature(enable = "avx512f")]
38728	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38729	#[cfg_attr(test, assert_instr(vgetexpss))]
38730	pub fn _mm_mask_getexp_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38731	unsafe {
38732	transmute(src:vgetexpss(
38733	a.as_f32x4(),
38734	b.as_f32x4(),
38735	src.as_f32x4(),
38736	mask:k,
38737	_MM_FROUND_NO_EXC,
38738	))
38739	}
38740	}
38741
38742	/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
38743	///
38744	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_ss&expand=2864)
38745	#[inline]
38746	#[target_feature(enable = "avx512f")]
38747	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38748	#[cfg_attr(test, assert_instr(vgetexpss))]
38749	pub fn _mm_maskz_getexp_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38750	unsafe {
38751	transmute(src:vgetexpss(
38752	a.as_f32x4(),
38753	b.as_f32x4(),
38754	src:f32x4::ZERO,
38755	mask:k,
38756	_MM_FROUND_NO_EXC,
38757	))
38758	}
38759	}
38760
38761	/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
38762	///
38763	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_sd&expand=2859)
38764	#[inline]
38765	#[target_feature(enable = "avx512f")]
38766	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38767	#[cfg_attr(test, assert_instr(vgetexpsd))]
38768	pub fn _mm_getexp_sd(a: __m128d, b: __m128d) -> __m128d {
38769	unsafe {
38770	transmute(src:vgetexpsd(
38771	a.as_f64x2(),
38772	b.as_f64x2(),
38773	src:f64x2::ZERO,
38774	mask:`0b1`,
38775	_MM_FROUND_NO_EXC,
38776	))
38777	}
38778	}
38779
38780	/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
38781	///
38782	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_sd&expand=2860)
38783	#[inline]
38784	#[target_feature(enable = "avx512f")]
38785	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38786	#[cfg_attr(test, assert_instr(vgetexpsd))]
38787	pub fn _mm_mask_getexp_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38788	unsafe {
38789	transmute(src:vgetexpsd(
38790	a.as_f64x2(),
38791	b.as_f64x2(),
38792	src.as_f64x2(),
38793	mask:k,
38794	_MM_FROUND_NO_EXC,
38795	))
38796	}
38797	}
38798
38799	/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
38800	///
38801	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_sd&expand=2861)
38802	#[inline]
38803	#[target_feature(enable = "avx512f")]
38804	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38805	#[cfg_attr(test, assert_instr(vgetexpsd))]
38806	pub fn _mm_maskz_getexp_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38807	unsafe {
38808	transmute(src:vgetexpsd(
38809	a.as_f64x2(),
38810	b.as_f64x2(),
38811	src:f64x2::ZERO,
38812	mask:k,
38813	_MM_FROUND_NO_EXC,
38814	))
38815	}
38816	}
38817
38818	/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
38819	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
38820	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
38821	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
38822	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
38823	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
38824	/// The sign is determined by sc which can take the following values:\
38825	/// _MM_MANT_SIGN_src // sign = sign(src)\
38826	/// _MM_MANT_SIGN_zero // sign = 0\
38827	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
38828	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38829	///
38830	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_ss&expand=2898)
38831	#[inline]
38832	#[target_feature(enable = "avx512f")]
38833	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38834	#[cfg_attr(test, assert_instr(vgetmantss, NORM = `0`, SIGN = `0`))]
38835	#[rustc_legacy_const_generics(`2`, `3`)]
38836	pub fn _mm_getmant_ss<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
38837	a: __m128,
38838	b: __m128,
38839	) -> __m128 {
38840	unsafe {
38841	static_assert_uimm_bits!(NORM, `4`);
38842	static_assert_uimm_bits!(SIGN, `2`);
38843	let a: Simd = a.as_f32x4();
38844	let b: Simd = b.as_f32x4();
38845	let r: Simd = vgetmantss(
38846	a,
38847	b,
38848	SIGN << `2` \| NORM,
38849	src:f32x4::ZERO,
38850	m:`0b1`,
38851	_MM_FROUND_CUR_DIRECTION,
38852	);
38853	transmute(src:r)
38854	}
38855	}
38856
38857	/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
38858	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
38859	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
38860	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
38861	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
38862	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
38863	/// The sign is determined by sc which can take the following values:\
38864	/// _MM_MANT_SIGN_src // sign = sign(src)\
38865	/// _MM_MANT_SIGN_zero // sign = 0\
38866	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
38867	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38868	///
38869	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_ss&expand=2899)
38870	#[inline]
38871	#[target_feature(enable = "avx512f")]
38872	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38873	#[cfg_attr(test, assert_instr(vgetmantss, NORM = `0`, SIGN = `0`))]
38874	#[rustc_legacy_const_generics(`4`, `5`)]
38875	pub fn _mm_mask_getmant_ss<
38876	const NORM: _MM_MANTISSA_NORM_ENUM,
38877	const SIGN: _MM_MANTISSA_SIGN_ENUM,
38878	>(
38879	src: __m128,
38880	k: __mmask8,
38881	a: __m128,
38882	b: __m128,
38883	) -> __m128 {
38884	unsafe {
38885	static_assert_uimm_bits!(NORM, `4`);
38886	static_assert_uimm_bits!(SIGN, `2`);
38887	let a: Simd = a.as_f32x4();
38888	let b: Simd = b.as_f32x4();
38889	let src: Simd = src.as_f32x4();
38890	let r: Simd = vgetmantss(a, b, SIGN << `2` \| NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
38891	transmute(src:r)
38892	}
38893	}
38894
38895	/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
38896	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
38897	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
38898	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
38899	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
38900	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
38901	/// The sign is determined by sc which can take the following values:\
38902	/// _MM_MANT_SIGN_src // sign = sign(src)\
38903	/// _MM_MANT_SIGN_zero // sign = 0\
38904	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
38905	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38906	///
38907	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_ss&expand=2900)
38908	#[inline]
38909	#[target_feature(enable = "avx512f")]
38910	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38911	#[cfg_attr(test, assert_instr(vgetmantss, NORM = `0`, SIGN = `0`))]
38912	#[rustc_legacy_const_generics(`3`, `4`)]
38913	pub fn _mm_maskz_getmant_ss<
38914	const NORM: _MM_MANTISSA_NORM_ENUM,
38915	const SIGN: _MM_MANTISSA_SIGN_ENUM,
38916	>(
38917	k: __mmask8,
38918	a: __m128,
38919	b: __m128,
38920	) -> __m128 {
38921	unsafe {
38922	static_assert_uimm_bits!(NORM, `4`);
38923	static_assert_uimm_bits!(SIGN, `2`);
38924	let a: Simd = a.as_f32x4();
38925	let b: Simd = b.as_f32x4();
38926	let r: Simd = vgetmantss(
38927	a,
38928	b,
38929	SIGN << `2` \| NORM,
38930	src:f32x4::ZERO,
38931	m:k,
38932	_MM_FROUND_CUR_DIRECTION,
38933	);
38934	transmute(src:r)
38935	}
38936	}
38937
38938	/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
38939	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
38940	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
38941	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
38942	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
38943	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
38944	/// The sign is determined by sc which can take the following values:\
38945	/// _MM_MANT_SIGN_src // sign = sign(src)\
38946	/// _MM_MANT_SIGN_zero // sign = 0\
38947	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
38948	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38949	///
38950	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_sd&expand=2895)
38951	#[inline]
38952	#[target_feature(enable = "avx512f")]
38953	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38954	#[cfg_attr(test, assert_instr(vgetmantsd, NORM = `0`, SIGN = `0`))]
38955	#[rustc_legacy_const_generics(`2`, `3`)]
38956	pub fn _mm_getmant_sd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
38957	a: __m128d,
38958	b: __m128d,
38959	) -> __m128d {
38960	unsafe {
38961	static_assert_uimm_bits!(NORM, `4`);
38962	static_assert_uimm_bits!(SIGN, `2`);
38963	let a: Simd = a.as_f64x2();
38964	let b: Simd = b.as_f64x2();
38965	let r: Simd = vgetmantsd(
38966	a,
38967	b,
38968	SIGN << `2` \| NORM,
38969	src:f64x2::ZERO,
38970	m:`0b1`,
38971	_MM_FROUND_CUR_DIRECTION,
38972	);
38973	transmute(src:r)
38974	}
38975	}
38976
38977	/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
38978	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
38979	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
38980	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
38981	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
38982	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
38983	/// The sign is determined by sc which can take the following values:\
38984	/// _MM_MANT_SIGN_src // sign = sign(src)\
38985	/// _MM_MANT_SIGN_zero // sign = 0\
38986	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
38987	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38988	///
38989	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_sd&expand=2896)
38990	#[inline]
38991	#[target_feature(enable = "avx512f")]
38992	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38993	#[cfg_attr(test, assert_instr(vgetmantsd, NORM = `0`, SIGN = `0`))]
38994	#[rustc_legacy_const_generics(`4`, `5`)]
38995	pub fn _mm_mask_getmant_sd<
38996	const NORM: _MM_MANTISSA_NORM_ENUM,
38997	const SIGN: _MM_MANTISSA_SIGN_ENUM,
38998	>(
38999	src: __m128d,
39000	k: __mmask8,
39001	a: __m128d,
39002	b: __m128d,
39003	) -> __m128d {
39004	unsafe {
39005	static_assert_uimm_bits!(NORM, `4`);
39006	static_assert_uimm_bits!(SIGN, `2`);
39007	let a: Simd = a.as_f64x2();
39008	let b: Simd = b.as_f64x2();
39009	let src: Simd = src.as_f64x2();
39010	let r: Simd = vgetmantsd(a, b, SIGN << `2` \| NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
39011	transmute(src:r)
39012	}
39013	}
39014
39015	/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39016	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39017	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
39018	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
39019	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
39020	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39021	/// The sign is determined by sc which can take the following values:\
39022	/// _MM_MANT_SIGN_src // sign = sign(src)\
39023	/// _MM_MANT_SIGN_zero // sign = 0\
39024	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
39025	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39026	///
39027	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_sd&expand=2897)
39028	#[inline]
39029	#[target_feature(enable = "avx512f")]
39030	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39031	#[cfg_attr(test, assert_instr(vgetmantsd, NORM = `0`, SIGN = `0`))]
39032	#[rustc_legacy_const_generics(`3`, `4`)]
39033	pub fn _mm_maskz_getmant_sd<
39034	const NORM: _MM_MANTISSA_NORM_ENUM,
39035	const SIGN: _MM_MANTISSA_SIGN_ENUM,
39036	>(
39037	k: __mmask8,
39038	a: __m128d,
39039	b: __m128d,
39040	) -> __m128d {
39041	unsafe {
39042	static_assert_uimm_bits!(NORM, `4`);
39043	static_assert_uimm_bits!(SIGN, `2`);
39044	let a: Simd = a.as_f64x2();
39045	let b: Simd = b.as_f64x2();
39046	let r: Simd = vgetmantsd(
39047	a,
39048	b,
39049	SIGN << `2` \| NORM,
39050	src:f64x2::ZERO,
39051	m:k,
39052	_MM_FROUND_CUR_DIRECTION,
39053	);
39054	transmute(src:r)
39055	}
39056	}
39057
39058	/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39059	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39060	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39061	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39062	/// * [`_MM_FROUND_TO_POS_INF`] : round up
39063	/// * [`_MM_FROUND_TO_ZERO`] : truncate
39064	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39065	///
39066	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_ss&expand=4802)
39067	#[inline]
39068	#[target_feature(enable = "avx512f")]
39069	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39070	#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = `255`))]
39071	#[rustc_legacy_const_generics(`2`)]
39072	pub fn _mm_roundscale_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
39073	unsafe {
39074	static_assert_uimm_bits!(IMM8, `8`);
39075	let a: Simd = a.as_f32x4();
39076	let b: Simd = b.as_f32x4();
39077	let r: Simd = vrndscaless(
39078	a,
39079	b,
39080	src:f32x4::ZERO,
39081	mask:`0b11111111`,
39082	IMM8,
39083	_MM_FROUND_CUR_DIRECTION,
39084	);
39085	transmute(src:r)
39086	}
39087	}
39088
39089	/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39090	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39091	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39092	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39093	/// * [`_MM_FROUND_TO_POS_INF`] : round up
39094	/// * [`_MM_FROUND_TO_ZERO`] : truncate
39095	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39096	///
39097	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_ss&expand=4800)
39098	#[inline]
39099	#[target_feature(enable = "avx512f")]
39100	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39101	#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = `0`))]
39102	#[rustc_legacy_const_generics(`4`)]
39103	pub fn _mm_mask_roundscale_ss<const IMM8: i32>(
39104	src: __m128,
39105	k: __mmask8,
39106	a: __m128,
39107	b: __m128,
39108	) -> __m128 {
39109	unsafe {
39110	static_assert_uimm_bits!(IMM8, `8`);
39111	let a: Simd = a.as_f32x4();
39112	let b: Simd = b.as_f32x4();
39113	let src: Simd = src.as_f32x4();
39114	let r: Simd = vrndscaless(a, b, src, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
39115	transmute(src:r)
39116	}
39117	}
39118
39119	/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39120	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39121	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39122	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39123	/// * [`_MM_FROUND_TO_POS_INF`] : round up
39124	/// * [`_MM_FROUND_TO_ZERO`] : truncate
39125	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39126	///
39127	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_ss&expand=4801)
39128	#[inline]
39129	#[target_feature(enable = "avx512f")]
39130	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39131	#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = `0`))]
39132	#[rustc_legacy_const_generics(`3`)]
39133	pub fn _mm_maskz_roundscale_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39134	unsafe {
39135	static_assert_uimm_bits!(IMM8, `8`);
39136	let a: Simd = a.as_f32x4();
39137	let b: Simd = b.as_f32x4();
39138	let r: Simd = vrndscaless(a, b, src:f32x4::ZERO, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
39139	transmute(src:r)
39140	}
39141	}
39142
39143	/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39144	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39145	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39146	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39147	/// * [`_MM_FROUND_TO_POS_INF`] : round up
39148	/// * [`_MM_FROUND_TO_ZERO`] : truncate
39149	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39150	///
39151	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_sd&expand=4799)
39152	#[inline]
39153	#[target_feature(enable = "avx512f")]
39154	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39155	#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = `255`))]
39156	#[rustc_legacy_const_generics(`2`)]
39157	pub fn _mm_roundscale_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
39158	unsafe {
39159	static_assert_uimm_bits!(IMM8, `8`);
39160	let a: Simd = a.as_f64x2();
39161	let b: Simd = b.as_f64x2();
39162	let r: Simd = vrndscalesd(
39163	a,
39164	b,
39165	src:f64x2::ZERO,
39166	mask:`0b11111111`,
39167	IMM8,
39168	_MM_FROUND_CUR_DIRECTION,
39169	);
39170	transmute(src:r)
39171	}
39172	}
39173
39174	/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39175	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39176	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39177	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39178	/// * [`_MM_FROUND_TO_POS_INF`] : round up
39179	/// * [`_MM_FROUND_TO_ZERO`] : truncate
39180	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39181	///
39182	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_sd&expand=4797)
39183	#[inline]
39184	#[target_feature(enable = "avx512f")]
39185	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39186	#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = `0`))]
39187	#[rustc_legacy_const_generics(`4`)]
39188	pub fn _mm_mask_roundscale_sd<const IMM8: i32>(
39189	src: __m128d,
39190	k: __mmask8,
39191	a: __m128d,
39192	b: __m128d,
39193	) -> __m128d {
39194	unsafe {
39195	static_assert_uimm_bits!(IMM8, `8`);
39196	let a: Simd = a.as_f64x2();
39197	let b: Simd = b.as_f64x2();
39198	let src: Simd = src.as_f64x2();
39199	let r: Simd = vrndscalesd(a, b, src, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
39200	transmute(src:r)
39201	}
39202	}
39203
39204	/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39205	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39206	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39207	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39208	/// * [`_MM_FROUND_TO_POS_INF`] : round up
39209	/// * [`_MM_FROUND_TO_ZERO`] : truncate
39210	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39211	///
39212	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_sd&expand=4798)
39213	#[inline]
39214	#[target_feature(enable = "avx512f")]
39215	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39216	#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = `0`))]
39217	#[rustc_legacy_const_generics(`3`)]
39218	pub fn _mm_maskz_roundscale_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
39219	unsafe {
39220	static_assert_uimm_bits!(IMM8, `8`);
39221	let a: Simd = a.as_f64x2();
39222	let b: Simd = b.as_f64x2();
39223	let r: Simd = vrndscalesd(a, b, src:f64x2::ZERO, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
39224	transmute(src:r)
39225	}
39226	}
39227
39228	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
39229	///
39230	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_ss&expand=4901)
39231	#[inline]
39232	#[target_feature(enable = "avx512f")]
39233	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39234	#[cfg_attr(test, assert_instr(vscalefss))]
39235	pub fn _mm_scalef_ss(a: __m128, b: __m128) -> __m128 {
39236	unsafe {
39237	let a: Simd = a.as_f32x4();
39238	let b: Simd = b.as_f32x4();
39239	transmute(src:vscalefss(
39240	a,
39241	b,
39242	src:f32x4::ZERO,
39243	mask:`0b11111111`,
39244	_MM_FROUND_CUR_DIRECTION,
39245	))
39246	}
39247	}
39248
39249	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39250	///
39251	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_ss&expand=4899)
39252	#[inline]
39253	#[target_feature(enable = "avx512f")]
39254	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39255	#[cfg_attr(test, assert_instr(vscalefss))]
39256	pub fn _mm_mask_scalef_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
39257	unsafe {
39258	let a: Simd = a.as_f32x4();
39259	let b: Simd = b.as_f32x4();
39260	let src: Simd = src.as_f32x4();
39261	transmute(src:vscalefss(a, b, src, mask:k, _MM_FROUND_CUR_DIRECTION))
39262	}
39263	}
39264
39265	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39266	///
39267	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_ss&expand=4900)
39268	#[inline]
39269	#[target_feature(enable = "avx512f")]
39270	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39271	#[cfg_attr(test, assert_instr(vscalefss))]
39272	pub fn _mm_maskz_scalef_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39273	unsafe {
39274	transmute(src:vscalefss(
39275	a.as_f32x4(),
39276	b.as_f32x4(),
39277	src:f32x4::ZERO,
39278	mask:k,
39279	_MM_FROUND_CUR_DIRECTION,
39280	))
39281	}
39282	}
39283
39284	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
39285	///
39286	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_sd&expand=4898)
39287	#[inline]
39288	#[target_feature(enable = "avx512f")]
39289	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39290	#[cfg_attr(test, assert_instr(vscalefsd))]
39291	pub fn _mm_scalef_sd(a: __m128d, b: __m128d) -> __m128d {
39292	unsafe {
39293	transmute(src:vscalefsd(
39294	a.as_f64x2(),
39295	b.as_f64x2(),
39296	src:f64x2::ZERO,
39297	mask:`0b11111111`,
39298	_MM_FROUND_CUR_DIRECTION,
39299	))
39300	}
39301	}
39302
39303	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39304	///
39305	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_sd&expand=4896)
39306	#[inline]
39307	#[target_feature(enable = "avx512f")]
39308	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39309	#[cfg_attr(test, assert_instr(vscalefsd))]
39310	pub fn _mm_mask_scalef_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
39311	unsafe {
39312	transmute(src:vscalefsd(
39313	a.as_f64x2(),
39314	b.as_f64x2(),
39315	src.as_f64x2(),
39316	mask:k,
39317	_MM_FROUND_CUR_DIRECTION,
39318	))
39319	}
39320	}
39321
39322	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39323	///
39324	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_sd&expand=4897)
39325	#[inline]
39326	#[target_feature(enable = "avx512f")]
39327	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39328	#[cfg_attr(test, assert_instr(vscalefsd))]
39329	pub fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
39330	unsafe {
39331	transmute(src:vscalefsd(
39332	a.as_f64x2(),
39333	b.as_f64x2(),
39334	src:f64x2::ZERO,
39335	mask:k,
39336	_MM_FROUND_CUR_DIRECTION,
39337	))
39338	}
39339	}
39340
39341	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39342	///
39343	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_ss&expand=2582)
39344	#[inline]
39345	#[target_feature(enable = "avx512f")]
39346	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39347	#[cfg_attr(test, assert_instr(vfmadd))]
39348	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39349	pub const fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
39350	unsafe {
39351	let mut fmadd: f32 = simd_extract!(a, `0`);
39352	if (k & `0b00000001`) != `0` {
39353	let extractb: f32 = simd_extract!(b, `0`);
39354	let extractc: f32 = simd_extract!(c, `0`);
39355	fmadd = fmaf32(a:fmadd, b:extractb, c:extractc);
39356	}
39357	simd_insert!(a, `0`, fmadd)
39358	}
39359	}
39360
39361	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39362	///
39363	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_ss&expand=2584)
39364	#[inline]
39365	#[target_feature(enable = "avx512f")]
39366	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39367	#[cfg_attr(test, assert_instr(vfmadd))]
39368	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39369	pub const fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
39370	unsafe {
39371	let mut fmadd: f32 = `0.`;
39372	if (k & `0b00000001`) != `0` {
39373	let extracta: f32 = simd_extract!(a, `0`);
39374	let extractb: f32 = simd_extract!(b, `0`);
39375	let extractc: f32 = simd_extract!(c, `0`);
39376	fmadd = fmaf32(a:extracta, b:extractb, c:extractc);
39377	}
39378	simd_insert!(a, `0`, fmadd)
39379	}
39380	}
39381
39382	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
39383	///
39384	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_ss&expand=2583)
39385	#[inline]
39386	#[target_feature(enable = "avx512f")]
39387	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39388	#[cfg_attr(test, assert_instr(vfmadd))]
39389	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39390	pub const fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
39391	unsafe {
39392	let mut fmadd: f32 = simd_extract!(c, `0`);
39393	if (k & `0b00000001`) != `0` {
39394	let extracta: f32 = simd_extract!(a, `0`);
39395	let extractb: f32 = simd_extract!(b, `0`);
39396	fmadd = fmaf32(a:extracta, b:extractb, c:fmadd);
39397	}
39398	simd_insert!(c, `0`, fmadd)
39399	}
39400	}
39401
39402	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39403	///
39404	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_sd&expand=2578)
39405	#[inline]
39406	#[target_feature(enable = "avx512f")]
39407	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39408	#[cfg_attr(test, assert_instr(vfmadd))]
39409	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39410	pub const fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
39411	unsafe {
39412	let mut fmadd: f64 = simd_extract!(a, `0`);
39413	if (k & `0b00000001`) != `0` {
39414	let extractb: f64 = simd_extract!(b, `0`);
39415	let extractc: f64 = simd_extract!(c, `0`);
39416	fmadd = fmaf64(a:fmadd, b:extractb, c:extractc);
39417	}
39418	simd_insert!(a, `0`, fmadd)
39419	}
39420	}
39421
39422	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39423	///
39424	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_sd&expand=2580)
39425	#[inline]
39426	#[target_feature(enable = "avx512f")]
39427	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39428	#[cfg_attr(test, assert_instr(vfmadd))]
39429	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39430	pub const fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39431	unsafe {
39432	let mut fmadd: f64 = `0.`;
39433	if (k & `0b00000001`) != `0` {
39434	let extracta: f64 = simd_extract!(a, `0`);
39435	let extractb: f64 = simd_extract!(b, `0`);
39436	let extractc: f64 = simd_extract!(c, `0`);
39437	fmadd = fmaf64(a:extracta, b:extractb, c:extractc);
39438	}
39439	simd_insert!(a, `0`, fmadd)
39440	}
39441	}
39442
39443	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
39444	///
39445	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_sd&expand=2579)
39446	#[inline]
39447	#[target_feature(enable = "avx512f")]
39448	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39449	#[cfg_attr(test, assert_instr(vfmadd))]
39450	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39451	pub const fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
39452	unsafe {
39453	let mut fmadd: f64 = simd_extract!(c, `0`);
39454	if (k & `0b00000001`) != `0` {
39455	let extracta: f64 = simd_extract!(a, `0`);
39456	let extractb: f64 = simd_extract!(b, `0`);
39457	fmadd = fmaf64(a:extracta, b:extractb, c:fmadd);
39458	}
39459	simd_insert!(c, `0`, fmadd)
39460	}
39461	}
39462
39463	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
39464	///
39465	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_ss&expand=2668)
39466	#[inline]
39467	#[target_feature(enable = "avx512f")]
39468	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39469	#[cfg_attr(test, assert_instr(vfmsub))]
39470	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39471	pub const fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
39472	unsafe {
39473	let mut fmsub: f32 = simd_extract!(a, `0`);
39474	if (k & `0b00000001`) != `0` {
39475	let extractb: f32 = simd_extract!(b, `0`);
39476	let extractc: f32 = simd_extract!(c, `0`);
39477	let extractc: f32 = -extractc;
39478	fmsub = fmaf32(a:fmsub, b:extractb, c:extractc);
39479	}
39480	simd_insert!(a, `0`, fmsub)
39481	}
39482	}
39483
39484	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39485	///
39486	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_ss&expand=2670)
39487	#[inline]
39488	#[target_feature(enable = "avx512f")]
39489	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39490	#[cfg_attr(test, assert_instr(vfmsub))]
39491	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39492	pub const fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
39493	unsafe {
39494	let mut fmsub: f32 = `0.`;
39495	if (k & `0b00000001`) != `0` {
39496	let extracta: f32 = simd_extract!(a, `0`);
39497	let extractb: f32 = simd_extract!(b, `0`);
39498	let extractc: f32 = simd_extract!(c, `0`);
39499	let extractc: f32 = -extractc;
39500	fmsub = fmaf32(a:extracta, b:extractb, c:extractc);
39501	}
39502	simd_insert!(a, `0`, fmsub)
39503	}
39504	}
39505
39506	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
39507	///
39508	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_ss&expand=2669)
39509	#[inline]
39510	#[target_feature(enable = "avx512f")]
39511	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39512	#[cfg_attr(test, assert_instr(vfmsub))]
39513	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39514	pub const fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
39515	unsafe {
39516	let mut fmsub: f32 = simd_extract!(c, `0`);
39517	if (k & `0b00000001`) != `0` {
39518	let extracta: f32 = simd_extract!(a, `0`);
39519	let extractb: f32 = simd_extract!(b, `0`);
39520	let extractc: f32 = -fmsub;
39521	fmsub = fmaf32(a:extracta, b:extractb, c:extractc);
39522	}
39523	simd_insert!(c, `0`, fmsub)
39524	}
39525	}
39526
39527	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39528	///
39529	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_sd&expand=2664)
39530	#[inline]
39531	#[target_feature(enable = "avx512f")]
39532	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39533	#[cfg_attr(test, assert_instr(vfmsub))]
39534	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39535	pub const fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
39536	unsafe {
39537	let mut fmsub: f64 = simd_extract!(a, `0`);
39538	if (k & `0b00000001`) != `0` {
39539	let extractb: f64 = simd_extract!(b, `0`);
39540	let extractc: f64 = simd_extract!(c, `0`);
39541	let extractc: f64 = -extractc;
39542	fmsub = fmaf64(a:fmsub, b:extractb, c:extractc);
39543	}
39544	simd_insert!(a, `0`, fmsub)
39545	}
39546	}
39547
39548	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39549	///
39550	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_sd&expand=2666)
39551	#[inline]
39552	#[target_feature(enable = "avx512f")]
39553	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39554	#[cfg_attr(test, assert_instr(vfmsub))]
39555	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39556	pub const fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39557	unsafe {
39558	let mut fmsub: f64 = `0.`;
39559	if (k & `0b00000001`) != `0` {
39560	let extracta: f64 = simd_extract!(a, `0`);
39561	let extractb: f64 = simd_extract!(b, `0`);
39562	let extractc: f64 = simd_extract!(c, `0`);
39563	let extractc: f64 = -extractc;
39564	fmsub = fmaf64(a:extracta, b:extractb, c:extractc);
39565	}
39566	simd_insert!(a, `0`, fmsub)
39567	}
39568	}
39569
39570	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
39571	///
39572	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_sd&expand=2665)
39573	#[inline]
39574	#[target_feature(enable = "avx512f")]
39575	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39576	#[cfg_attr(test, assert_instr(vfmsub))]
39577	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39578	pub const fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
39579	unsafe {
39580	let mut fmsub: f64 = simd_extract!(c, `0`);
39581	if (k & `0b00000001`) != `0` {
39582	let extracta: f64 = simd_extract!(a, `0`);
39583	let extractb: f64 = simd_extract!(b, `0`);
39584	let extractc: f64 = -fmsub;
39585	fmsub = fmaf64(a:extracta, b:extractb, c:extractc);
39586	}
39587	simd_insert!(c, `0`, fmsub)
39588	}
39589	}
39590
39591	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39592	///
39593	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_ss&expand=2748)
39594	#[inline]
39595	#[target_feature(enable = "avx512f")]
39596	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39597	#[cfg_attr(test, assert_instr(vfnmadd))]
39598	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39599	pub const fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
39600	unsafe {
39601	let mut fnmadd: f32 = simd_extract!(a, `0`);
39602	if (k & `0b00000001`) != `0` {
39603	let extracta: f32 = -fnmadd;
39604	let extractb: f32 = simd_extract!(b, `0`);
39605	let extractc: f32 = simd_extract!(c, `0`);
39606	fnmadd = fmaf32(a:extracta, b:extractb, c:extractc);
39607	}
39608	simd_insert!(a, `0`, fnmadd)
39609	}
39610	}
39611
39612	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39613	///
39614	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_ss&expand=2750)
39615	#[inline]
39616	#[target_feature(enable = "avx512f")]
39617	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39618	#[cfg_attr(test, assert_instr(vfnmadd))]
39619	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39620	pub const fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
39621	unsafe {
39622	let mut fnmadd: f32 = `0.`;
39623	if (k & `0b00000001`) != `0` {
39624	let extracta: f32 = simd_extract!(a, `0`);
39625	let extracta: f32 = -extracta;
39626	let extractb: f32 = simd_extract!(b, `0`);
39627	let extractc: f32 = simd_extract!(c, `0`);
39628	fnmadd = fmaf32(a:extracta, b:extractb, c:extractc);
39629	}
39630	simd_insert!(a, `0`, fnmadd)
39631	}
39632	}
39633
39634	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
39635	///
39636	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_ss&expand=2749)
39637	#[inline]
39638	#[target_feature(enable = "avx512f")]
39639	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39640	#[cfg_attr(test, assert_instr(vfnmadd))]
39641	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39642	pub const fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
39643	unsafe {
39644	let mut fnmadd: f32 = simd_extract!(c, `0`);
39645	if (k & `0b00000001`) != `0` {
39646	let extracta: f32 = simd_extract!(a, `0`);
39647	let extracta: f32 = -extracta;
39648	let extractb: f32 = simd_extract!(b, `0`);
39649	fnmadd = fmaf32(a:extracta, b:extractb, c:fnmadd);
39650	}
39651	simd_insert!(c, `0`, fnmadd)
39652	}
39653	}
39654
39655	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39656	///
39657	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_sd&expand=2744)
39658	#[inline]
39659	#[target_feature(enable = "avx512f")]
39660	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39661	#[cfg_attr(test, assert_instr(vfnmadd))]
39662	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39663	pub const fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
39664	unsafe {
39665	let mut fnmadd: f64 = simd_extract!(a, `0`);
39666	if (k & `0b00000001`) != `0` {
39667	let extracta: f64 = -fnmadd;
39668	let extractb: f64 = simd_extract!(b, `0`);
39669	let extractc: f64 = simd_extract!(c, `0`);
39670	fnmadd = fmaf64(a:extracta, b:extractb, c:extractc);
39671	}
39672	simd_insert!(a, `0`, fnmadd)
39673	}
39674	}
39675
39676	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39677	///
39678	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_sd&expand=2746)
39679	#[inline]
39680	#[target_feature(enable = "avx512f")]
39681	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39682	#[cfg_attr(test, assert_instr(vfnmadd))]
39683	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39684	pub const fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39685	unsafe {
39686	let mut fnmadd: f64 = `0.`;
39687	if (k & `0b00000001`) != `0` {
39688	let extracta: f64 = simd_extract!(a, `0`);
39689	let extracta: f64 = -extracta;
39690	let extractb: f64 = simd_extract!(b, `0`);
39691	let extractc: f64 = simd_extract!(c, `0`);
39692	fnmadd = fmaf64(a:extracta, b:extractb, c:extractc);
39693	}
39694	simd_insert!(a, `0`, fnmadd)
39695	}
39696	}
39697
39698	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
39699	///
39700	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_sd&expand=2745)
39701	#[inline]
39702	#[target_feature(enable = "avx512f")]
39703	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39704	#[cfg_attr(test, assert_instr(vfnmadd))]
39705	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39706	pub const fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
39707	unsafe {
39708	let mut fnmadd: f64 = simd_extract!(c, `0`);
39709	if (k & `0b00000001`) != `0` {
39710	let extracta: f64 = simd_extract!(a, `0`);
39711	let extracta: f64 = -extracta;
39712	let extractb: f64 = simd_extract!(b, `0`);
39713	fnmadd = fmaf64(a:extracta, b:extractb, c:fnmadd);
39714	}
39715	simd_insert!(c, `0`, fnmadd)
39716	}
39717	}
39718
39719	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39720	///
39721	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_ss&expand=2796)
39722	#[inline]
39723	#[target_feature(enable = "avx512f")]
39724	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39725	#[cfg_attr(test, assert_instr(vfnmsub))]
39726	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39727	pub const fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
39728	unsafe {
39729	let mut fnmsub: f32 = simd_extract!(a, `0`);
39730	if (k & `0b00000001`) != `0` {
39731	let extracta: f32 = -fnmsub;
39732	let extractb: f32 = simd_extract!(b, `0`);
39733	let extractc: f32 = simd_extract!(c, `0`);
39734	let extractc: f32 = -extractc;
39735	fnmsub = fmaf32(a:extracta, b:extractb, c:extractc);
39736	}
39737	simd_insert!(a, `0`, fnmsub)
39738	}
39739	}
39740
39741	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39742	///
39743	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_ss&expand=2798)
39744	#[inline]
39745	#[target_feature(enable = "avx512f")]
39746	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39747	#[cfg_attr(test, assert_instr(vfnmsub))]
39748	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39749	pub const fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
39750	unsafe {
39751	let mut fnmsub: f32 = `0.`;
39752	if (k & `0b00000001`) != `0` {
39753	let extracta: f32 = simd_extract!(a, `0`);
39754	let extracta: f32 = -extracta;
39755	let extractb: f32 = simd_extract!(b, `0`);
39756	let extractc: f32 = simd_extract!(c, `0`);
39757	let extractc: f32 = -extractc;
39758	fnmsub = fmaf32(a:extracta, b:extractb, c:extractc);
39759	}
39760	simd_insert!(a, `0`, fnmsub)
39761	}
39762	}
39763
39764	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
39765	///
39766	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_ss&expand=2797)
39767	#[inline]
39768	#[target_feature(enable = "avx512f")]
39769	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39770	#[cfg_attr(test, assert_instr(vfnmsub))]
39771	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39772	pub const fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
39773	unsafe {
39774	let mut fnmsub: f32 = simd_extract!(c, `0`);
39775	if (k & `0b00000001`) != `0` {
39776	let extracta: f32 = simd_extract!(a, `0`);
39777	let extracta: f32 = -extracta;
39778	let extractb: f32 = simd_extract!(b, `0`);
39779	let extractc: f32 = -fnmsub;
39780	fnmsub = fmaf32(a:extracta, b:extractb, c:extractc);
39781	}
39782	simd_insert!(c, `0`, fnmsub)
39783	}
39784	}
39785
39786	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39787	///
39788	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_sd&expand=2792)
39789	#[inline]
39790	#[target_feature(enable = "avx512f")]
39791	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39792	#[cfg_attr(test, assert_instr(vfnmsub))]
39793	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39794	pub const fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
39795	unsafe {
39796	let mut fnmsub: f64 = simd_extract!(a, `0`);
39797	if (k & `0b00000001`) != `0` {
39798	let extracta: f64 = -fnmsub;
39799	let extractb: f64 = simd_extract!(b, `0`);
39800	let extractc: f64 = simd_extract!(c, `0`);
39801	let extractc: f64 = -extractc;
39802	fnmsub = fmaf64(a:extracta, b:extractb, c:extractc);
39803	}
39804	simd_insert!(a, `0`, fnmsub)
39805	}
39806	}
39807
39808	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39809	///
39810	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_sd&expand=2794)
39811	#[inline]
39812	#[target_feature(enable = "avx512f")]
39813	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39814	#[cfg_attr(test, assert_instr(vfnmsub))]
39815	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39816	pub const fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39817	unsafe {
39818	let mut fnmsub: f64 = `0.`;
39819	if (k & `0b00000001`) != `0` {
39820	let extracta: f64 = simd_extract!(a, `0`);
39821	let extracta: f64 = -extracta;
39822	let extractb: f64 = simd_extract!(b, `0`);
39823	let extractc: f64 = simd_extract!(c, `0`);
39824	let extractc: f64 = -extractc;
39825	fnmsub = fmaf64(a:extracta, b:extractb, c:extractc);
39826	}
39827	simd_insert!(a, `0`, fnmsub)
39828	}
39829	}
39830
39831	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
39832	///
39833	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_sd&expand=2793)
39834	#[inline]
39835	#[target_feature(enable = "avx512f")]
39836	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39837	#[cfg_attr(test, assert_instr(vfnmsub))]
39838	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39839	pub const fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
39840	unsafe {
39841	let mut fnmsub: f64 = simd_extract!(c, `0`);
39842	if (k & `0b00000001`) != `0` {
39843	let extracta: f64 = simd_extract!(a, `0`);
39844	let extracta: f64 = -extracta;
39845	let extractb: f64 = simd_extract!(b, `0`);
39846	let extractc: f64 = -fnmsub;
39847	fnmsub = fmaf64(a:extracta, b:extractb, c:extractc);
39848	}
39849	simd_insert!(c, `0`, fnmsub)
39850	}
39851	}
39852
39853	/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39854	///
39855	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39856	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39857	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39858	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39859	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39860	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39861	///
39862	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_ss&expand=151)
39863	#[inline]
39864	#[target_feature(enable = "avx512f")]
39865	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39866	#[cfg_attr(test, assert_instr(vaddss, ROUNDING = `8`))]
39867	#[rustc_legacy_const_generics(`2`)]
39868	pub fn _mm_add_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
39869	unsafe {
39870	static_assert_rounding!(ROUNDING);
39871	let a: Simd = a.as_f32x4();
39872	let b: Simd = b.as_f32x4();
39873	let r: Simd = vaddss(a, b, src:f32x4::ZERO, mask:`0b1`, ROUNDING);
39874	transmute(src:r)
39875	}
39876	}
39877
39878	/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39879	///
39880	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39881	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39882	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39883	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39884	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39885	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39886	///
39887	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_ss&expand=152)
39888	#[inline]
39889	#[target_feature(enable = "avx512f")]
39890	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39891	#[cfg_attr(test, assert_instr(vaddss, ROUNDING = `8`))]
39892	#[rustc_legacy_const_generics(`4`)]
39893	pub fn _mm_mask_add_round_ss<const ROUNDING: i32>(
39894	src: __m128,
39895	k: __mmask8,
39896	a: __m128,
39897	b: __m128,
39898	) -> __m128 {
39899	unsafe {
39900	static_assert_rounding!(ROUNDING);
39901	let a: Simd = a.as_f32x4();
39902	let b: Simd = b.as_f32x4();
39903	let src: Simd = src.as_f32x4();
39904	let r: Simd = vaddss(a, b, src, mask:k, ROUNDING);
39905	transmute(src:r)
39906	}
39907	}
39908
39909	/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39910	///
39911	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39912	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39913	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39914	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39915	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39916	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39917	///
39918	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_ss&expand=153)
39919	#[inline]
39920	#[target_feature(enable = "avx512f")]
39921	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39922	#[cfg_attr(test, assert_instr(vaddss, ROUNDING = `8`))]
39923	#[rustc_legacy_const_generics(`3`)]
39924	pub fn _mm_maskz_add_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39925	unsafe {
39926	static_assert_rounding!(ROUNDING);
39927	let a: Simd = a.as_f32x4();
39928	let b: Simd = b.as_f32x4();
39929	let r: Simd = vaddss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
39930	transmute(src:r)
39931	}
39932	}
39933
39934	/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39935	///
39936	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39937	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39938	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39939	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39940	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39941	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39942	///
39943	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_sd&expand=148)
39944	#[inline]
39945	#[target_feature(enable = "avx512f")]
39946	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39947	#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = `8`))]
39948	#[rustc_legacy_const_generics(`2`)]
39949	pub fn _mm_add_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
39950	unsafe {
39951	static_assert_rounding!(ROUNDING);
39952	let a: Simd = a.as_f64x2();
39953	let b: Simd = b.as_f64x2();
39954	let r: Simd = vaddsd(a, b, src:f64x2::ZERO, mask:`0b1`, ROUNDING);
39955	transmute(src:r)
39956	}
39957	}
39958
39959	/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39960	///
39961	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39962	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39963	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39964	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39965	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39966	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39967	///
39968	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_sd&expand=149)
39969	#[inline]
39970	#[target_feature(enable = "avx512f")]
39971	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39972	#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = `8`))]
39973	#[rustc_legacy_const_generics(`4`)]
39974	pub fn _mm_mask_add_round_sd<const ROUNDING: i32>(
39975	src: __m128d,
39976	k: __mmask8,
39977	a: __m128d,
39978	b: __m128d,
39979	) -> __m128d {
39980	unsafe {
39981	static_assert_rounding!(ROUNDING);
39982	let a: Simd = a.as_f64x2();
39983	let b: Simd = b.as_f64x2();
39984	let src: Simd = src.as_f64x2();
39985	let r: Simd = vaddsd(a, b, src, mask:k, ROUNDING);
39986	transmute(src:r)
39987	}
39988	}
39989
39990	/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39991	///
39992	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39993	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39994	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39995	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39996	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39997	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39998	///
39999	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_sd&expand=150)
40000	#[inline]
40001	#[target_feature(enable = "avx512f")]
40002	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40003	#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = `8`))]
40004	#[rustc_legacy_const_generics(`3`)]
40005	pub fn _mm_maskz_add_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
40006	unsafe {
40007	static_assert_rounding!(ROUNDING);
40008	let a: Simd = a.as_f64x2();
40009	let b: Simd = b.as_f64x2();
40010	let r: Simd = vaddsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
40011	transmute(src:r)
40012	}
40013	}
40014
40015	/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40016	///
40017	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40018	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40019	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40020	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40021	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40022	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40023	///
40024	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_ss&expand=5745)
40025	#[inline]
40026	#[target_feature(enable = "avx512f")]
40027	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40028	#[cfg_attr(test, assert_instr(vsubss, ROUNDING = `8`))]
40029	#[rustc_legacy_const_generics(`2`)]
40030	pub fn _mm_sub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
40031	unsafe {
40032	static_assert_rounding!(ROUNDING);
40033	let a: Simd = a.as_f32x4();
40034	let b: Simd = b.as_f32x4();
40035	let r: Simd = vsubss(a, b, src:f32x4::ZERO, mask:`0b1`, ROUNDING);
40036	transmute(src:r)
40037	}
40038	}
40039
40040	/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40041	///
40042	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40043	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40044	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40045	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40046	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40047	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40048	///
40049	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_ss&expand=5743)
40050	#[inline]
40051	#[target_feature(enable = "avx512f")]
40052	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40053	#[cfg_attr(test, assert_instr(vsubss, ROUNDING = `8`))]
40054	#[rustc_legacy_const_generics(`4`)]
40055	pub fn _mm_mask_sub_round_ss<const ROUNDING: i32>(
40056	src: __m128,
40057	k: __mmask8,
40058	a: __m128,
40059	b: __m128,
40060	) -> __m128 {
40061	unsafe {
40062	static_assert_rounding!(ROUNDING);
40063	let a: Simd = a.as_f32x4();
40064	let b: Simd = b.as_f32x4();
40065	let src: Simd = src.as_f32x4();
40066	let r: Simd = vsubss(a, b, src, mask:k, ROUNDING);
40067	transmute(src:r)
40068	}
40069	}
40070
40071	/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40072	///
40073	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40074	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40075	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40076	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40077	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40078	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40079	///
40080	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_ss&expand=5744)
40081	#[inline]
40082	#[target_feature(enable = "avx512f")]
40083	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40084	#[cfg_attr(test, assert_instr(vsubss, ROUNDING = `8`))]
40085	#[rustc_legacy_const_generics(`3`)]
40086	pub fn _mm_maskz_sub_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40087	unsafe {
40088	static_assert_rounding!(ROUNDING);
40089	let a: Simd = a.as_f32x4();
40090	let b: Simd = b.as_f32x4();
40091	let r: Simd = vsubss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
40092	transmute(src:r)
40093	}
40094	}
40095
40096	/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40097	///
40098	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40099	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40100	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40101	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40102	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40103	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40104	///
40105	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_sd&expand=5742)
40106	#[inline]
40107	#[target_feature(enable = "avx512f")]
40108	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40109	#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = `8`))]
40110	#[rustc_legacy_const_generics(`2`)]
40111	pub fn _mm_sub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
40112	unsafe {
40113	static_assert_rounding!(ROUNDING);
40114	let a: Simd = a.as_f64x2();
40115	let b: Simd = b.as_f64x2();
40116	let r: Simd = vsubsd(a, b, src:f64x2::ZERO, mask:`0b1`, ROUNDING);
40117	transmute(src:r)
40118	}
40119	}
40120
40121	/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40122	///
40123	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40124	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40125	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40126	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40127	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40128	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40129	///
40130	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_sd&expand=5740)
40131	#[inline]
40132	#[target_feature(enable = "avx512f")]
40133	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40134	#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = `8`))]
40135	#[rustc_legacy_const_generics(`4`)]
40136	pub fn _mm_mask_sub_round_sd<const ROUNDING: i32>(
40137	src: __m128d,
40138	k: __mmask8,
40139	a: __m128d,
40140	b: __m128d,
40141	) -> __m128d {
40142	unsafe {
40143	static_assert_rounding!(ROUNDING);
40144	let a: Simd = a.as_f64x2();
40145	let b: Simd = b.as_f64x2();
40146	let src: Simd = src.as_f64x2();
40147	let r: Simd = vsubsd(a, b, src, mask:k, ROUNDING);
40148	transmute(src:r)
40149	}
40150	}
40151
40152	/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40153	///
40154	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40155	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40156	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40157	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40158	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40159	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40160	///
40161	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_sd&expand=5741)
40162	#[inline]
40163	#[target_feature(enable = "avx512f")]
40164	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40165	#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = `8`))]
40166	#[rustc_legacy_const_generics(`3`)]
40167	pub fn _mm_maskz_sub_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
40168	unsafe {
40169	static_assert_rounding!(ROUNDING);
40170	let a: Simd = a.as_f64x2();
40171	let b: Simd = b.as_f64x2();
40172	let r: Simd = vsubsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
40173	transmute(src:r)
40174	}
40175	}
40176
40177	/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40178	///
40179	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40180	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40181	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40182	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40183	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40184	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40185	///
40186	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_ss&expand=3946)
40187	#[inline]
40188	#[target_feature(enable = "avx512f")]
40189	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40190	#[cfg_attr(test, assert_instr(vmulss, ROUNDING = `8`))]
40191	#[rustc_legacy_const_generics(`2`)]
40192	pub fn _mm_mul_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
40193	unsafe {
40194	static_assert_rounding!(ROUNDING);
40195	let a: Simd = a.as_f32x4();
40196	let b: Simd = b.as_f32x4();
40197	let r: Simd = vmulss(a, b, src:f32x4::ZERO, mask:`0b1`, ROUNDING);
40198	transmute(src:r)
40199	}
40200	}
40201
40202	/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40203	///
40204	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40205	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40206	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40207	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40208	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40209	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40210	///
40211	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_ss&expand=3944)
40212	#[inline]
40213	#[target_feature(enable = "avx512f")]
40214	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40215	#[cfg_attr(test, assert_instr(vmulss, ROUNDING = `8`))]
40216	#[rustc_legacy_const_generics(`4`)]
40217	pub fn _mm_mask_mul_round_ss<const ROUNDING: i32>(
40218	src: __m128,
40219	k: __mmask8,
40220	a: __m128,
40221	b: __m128,
40222	) -> __m128 {
40223	unsafe {
40224	static_assert_rounding!(ROUNDING);
40225	let a: Simd = a.as_f32x4();
40226	let b: Simd = b.as_f32x4();
40227	let src: Simd = src.as_f32x4();
40228	let r: Simd = vmulss(a, b, src, mask:k, ROUNDING);
40229	transmute(src:r)
40230	}
40231	}
40232
40233	/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40234	///
40235	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40236	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40237	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40238	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40239	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40240	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40241	///
40242	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_ss&expand=3945)
40243	#[inline]
40244	#[target_feature(enable = "avx512f")]
40245	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40246	#[cfg_attr(test, assert_instr(vmulss, ROUNDING = `8`))]
40247	#[rustc_legacy_const_generics(`3`)]
40248	pub fn _mm_maskz_mul_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40249	unsafe {
40250	static_assert_rounding!(ROUNDING);
40251	let a: Simd = a.as_f32x4();
40252	let b: Simd = b.as_f32x4();
40253	let r: Simd = vmulss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
40254	transmute(src:r)
40255	}
40256	}
40257
40258	/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40259	///
40260	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40261	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40262	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40263	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40264	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40265	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40266	///
40267	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_sd&expand=3943)
40268	#[inline]
40269	#[target_feature(enable = "avx512f")]
40270	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40271	#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = `8`))]
40272	#[rustc_legacy_const_generics(`2`)]
40273	pub fn _mm_mul_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
40274	unsafe {
40275	static_assert_rounding!(ROUNDING);
40276	let a: Simd = a.as_f64x2();
40277	let b: Simd = b.as_f64x2();
40278	let r: Simd = vmulsd(a, b, src:f64x2::ZERO, mask:`0b1`, ROUNDING);
40279	transmute(src:r)
40280	}
40281	}
40282
40283	/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40284	///
40285	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40286	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40287	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40288	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40289	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40290	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40291	///
40292	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_sd&expand=3941)
40293	#[inline]
40294	#[target_feature(enable = "avx512f")]
40295	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40296	#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = `8`))]
40297	#[rustc_legacy_const_generics(`4`)]
40298	pub fn _mm_mask_mul_round_sd<const ROUNDING: i32>(
40299	src: __m128d,
40300	k: __mmask8,
40301	a: __m128d,
40302	b: __m128d,
40303	) -> __m128d {
40304	unsafe {
40305	static_assert_rounding!(ROUNDING);
40306	let a: Simd = a.as_f64x2();
40307	let b: Simd = b.as_f64x2();
40308	let src: Simd = src.as_f64x2();
40309	let r: Simd = vmulsd(a, b, src, mask:k, ROUNDING);
40310	transmute(src:r)
40311	}
40312	}
40313
40314	/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40315	///
40316	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40317	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40318	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40319	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40320	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40321	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40322	///
40323	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_sd&expand=3942)
40324	#[inline]
40325	#[target_feature(enable = "avx512f")]
40326	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40327	#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = `8`))]
40328	#[rustc_legacy_const_generics(`3`)]
40329	pub fn _mm_maskz_mul_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
40330	unsafe {
40331	static_assert_rounding!(ROUNDING);
40332	let a: Simd = a.as_f64x2();
40333	let b: Simd = b.as_f64x2();
40334	let r: Simd = vmulsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
40335	transmute(src:r)
40336	}
40337	}
40338
40339	/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40340	///
40341	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40342	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40343	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40344	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40345	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40346	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40347	///
40348	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_ss&expand=2174)
40349	#[inline]
40350	#[target_feature(enable = "avx512f")]
40351	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40352	#[cfg_attr(test, assert_instr(vdivss, ROUNDING = `8`))]
40353	#[rustc_legacy_const_generics(`2`)]
40354	pub fn _mm_div_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
40355	unsafe {
40356	static_assert_rounding!(ROUNDING);
40357	let a: Simd = a.as_f32x4();
40358	let b: Simd = b.as_f32x4();
40359	let r: Simd = vdivss(a, b, src:f32x4::ZERO, mask:`0b1`, ROUNDING);
40360	transmute(src:r)
40361	}
40362	}
40363
40364	/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40365	///
40366	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40367	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40368	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40369	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40370	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40371	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40372	///
40373	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_ss&expand=2175)
40374	#[inline]
40375	#[target_feature(enable = "avx512f")]
40376	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40377	#[cfg_attr(test, assert_instr(vdivss, ROUNDING = `8`))]
40378	#[rustc_legacy_const_generics(`4`)]
40379	pub fn _mm_mask_div_round_ss<const ROUNDING: i32>(
40380	src: __m128,
40381	k: __mmask8,
40382	a: __m128,
40383	b: __m128,
40384	) -> __m128 {
40385	unsafe {
40386	static_assert_rounding!(ROUNDING);
40387	let a: Simd = a.as_f32x4();
40388	let b: Simd = b.as_f32x4();
40389	let src: Simd = src.as_f32x4();
40390	let r: Simd = vdivss(a, b, src, mask:k, ROUNDING);
40391	transmute(src:r)
40392	}
40393	}
40394
40395	/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40396	///
40397	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40398	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40399	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40400	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40401	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40402	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40403	///
40404	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_ss&expand=2176)
40405	#[inline]
40406	#[target_feature(enable = "avx512f")]
40407	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40408	#[cfg_attr(test, assert_instr(vdivss, ROUNDING = `8`))]
40409	#[rustc_legacy_const_generics(`3`)]
40410	pub fn _mm_maskz_div_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40411	unsafe {
40412	static_assert_rounding!(ROUNDING);
40413	let a: Simd = a.as_f32x4();
40414	let b: Simd = b.as_f32x4();
40415	let r: Simd = vdivss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
40416	transmute(src:r)
40417	}
40418	}
40419
40420	/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40421	///
40422	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40423	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40424	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40425	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40426	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40427	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40428	///
40429	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_sd&expand=2171)
40430	#[inline]
40431	#[target_feature(enable = "avx512f")]
40432	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40433	#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = `8`))]
40434	#[rustc_legacy_const_generics(`2`)]
40435	pub fn _mm_div_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
40436	unsafe {
40437	static_assert_rounding!(ROUNDING);
40438	let a: Simd = a.as_f64x2();
40439	let b: Simd = b.as_f64x2();
40440	let r: Simd = vdivsd(a, b, src:f64x2::ZERO, mask:`0b1`, ROUNDING);
40441	transmute(src:r)
40442	}
40443	}
40444
40445	/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40446	///
40447	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40448	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40449	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40450	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40451	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40452	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40453	///
40454	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_sd&expand=2172)
40455	#[inline]
40456	#[target_feature(enable = "avx512f")]
40457	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40458	#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = `8`))]
40459	#[rustc_legacy_const_generics(`4`)]
40460	pub fn _mm_mask_div_round_sd<const ROUNDING: i32>(
40461	src: __m128d,
40462	k: __mmask8,
40463	a: __m128d,
40464	b: __m128d,
40465	) -> __m128d {
40466	unsafe {
40467	static_assert_rounding!(ROUNDING);
40468	let a: Simd = a.as_f64x2();
40469	let b: Simd = b.as_f64x2();
40470	let src: Simd = src.as_f64x2();
40471	let r: Simd = vdivsd(a, b, src, mask:k, ROUNDING);
40472	transmute(src:r)
40473	}
40474	}
40475
40476	/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40477	///
40478	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40479	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40480	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40481	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40482	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40483	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40484	///
40485	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_sd&expand=2173)
40486	#[inline]
40487	#[target_feature(enable = "avx512f")]
40488	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40489	#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = `8`))]
40490	#[rustc_legacy_const_generics(`3`)]
40491	pub fn _mm_maskz_div_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
40492	unsafe {
40493	static_assert_rounding!(ROUNDING);
40494	let a: Simd = a.as_f64x2();
40495	let b: Simd = b.as_f64x2();
40496	let r: Simd = vdivsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
40497	transmute(src:r)
40498	}
40499	}
40500
40501	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40502	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40503	///
40504	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_ss&expand=3668)
40505	#[inline]
40506	#[target_feature(enable = "avx512f")]
40507	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40508	#[cfg_attr(test, assert_instr(vmaxss, SAE = `8`))]
40509	#[rustc_legacy_const_generics(`2`)]
40510	pub fn _mm_max_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
40511	unsafe {
40512	static_assert_sae!(SAE);
40513	let a: Simd = a.as_f32x4();
40514	let b: Simd = b.as_f32x4();
40515	let r: Simd = vmaxss(a, b, src:f32x4::ZERO, mask:`0b1`, SAE);
40516	transmute(src:r)
40517	}
40518	}
40519
40520	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40521	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40522	///
40523	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_round_ss&expand=3672)
40524	#[inline]
40525	#[target_feature(enable = "avx512f")]
40526	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40527	#[cfg_attr(test, assert_instr(vmaxss, SAE = `8`))]
40528	#[rustc_legacy_const_generics(`4`)]
40529	pub fn _mm_mask_max_round_ss<const SAE: i32>(
40530	src: __m128,
40531	k: __mmask8,
40532	a: __m128,
40533	b: __m128,
40534	) -> __m128 {
40535	unsafe {
40536	static_assert_sae!(SAE);
40537	let a: Simd = a.as_f32x4();
40538	let b: Simd = b.as_f32x4();
40539	let src: Simd = src.as_f32x4();
40540	let r: Simd = vmaxss(a, b, src, mask:k, SAE);
40541	transmute(src:r)
40542	}
40543	}
40544
40545	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40546	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40547	///
40548	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_round_ss&expand=3667)
40549	#[inline]
40550	#[target_feature(enable = "avx512f")]
40551	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40552	#[cfg_attr(test, assert_instr(vmaxss, SAE = `8`))]
40553	#[rustc_legacy_const_generics(`3`)]
40554	pub fn _mm_maskz_max_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40555	unsafe {
40556	static_assert_sae!(SAE);
40557	let a: Simd = a.as_f32x4();
40558	let b: Simd = b.as_f32x4();
40559	let r: Simd = vmaxss(a, b, src:f32x4::ZERO, mask:k, SAE);
40560	transmute(src:r)
40561	}
40562	}
40563
40564	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40565	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40566	///
40567	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_sd&expand=3665)
40568	#[inline]
40569	#[target_feature(enable = "avx512f")]
40570	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40571	#[cfg_attr(test, assert_instr(vmaxsd, SAE = `8`))]
40572	#[rustc_legacy_const_generics(`2`)]
40573	pub fn _mm_max_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
40574	unsafe {
40575	static_assert_sae!(SAE);
40576	let a: Simd = a.as_f64x2();
40577	let b: Simd = b.as_f64x2();
40578	let r: Simd = vmaxsd(a, b, src:f64x2::ZERO, mask:`0b1`, SAE);
40579	transmute(src:r)
40580	}
40581	}
40582
40583	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40584	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40585	///
40586	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_round_sd&expand=3663)
40587	#[inline]
40588	#[target_feature(enable = "avx512f")]
40589	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40590	#[cfg_attr(test, assert_instr(vmaxsd, SAE = `8`))]
40591	#[rustc_legacy_const_generics(`4`)]
40592	pub fn _mm_mask_max_round_sd<const SAE: i32>(
40593	src: __m128d,
40594	k: __mmask8,
40595	a: __m128d,
40596	b: __m128d,
40597	) -> __m128d {
40598	unsafe {
40599	static_assert_sae!(SAE);
40600	let a: Simd = a.as_f64x2();
40601	let b: Simd = b.as_f64x2();
40602	let src: Simd = src.as_f64x2();
40603	let r: Simd = vmaxsd(a, b, src, mask:k, SAE);
40604	transmute(src:r)
40605	}
40606	}
40607
40608	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40609	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40610	///
40611	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_round_sd&expand=3670)
40612	#[inline]
40613	#[target_feature(enable = "avx512f")]
40614	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40615	#[cfg_attr(test, assert_instr(vmaxsd, SAE = `8`))]
40616	#[rustc_legacy_const_generics(`3`)]
40617	pub fn _mm_maskz_max_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
40618	unsafe {
40619	static_assert_sae!(SAE);
40620	let a: Simd = a.as_f64x2();
40621	let b: Simd = b.as_f64x2();
40622	let r: Simd = vmaxsd(a, b, src:f64x2::ZERO, mask:k, SAE);
40623	transmute(src:r)
40624	}
40625	}
40626
40627	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40628	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40629	///
40630	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_ss&expand=3782)
40631	#[inline]
40632	#[target_feature(enable = "avx512f")]
40633	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40634	#[cfg_attr(test, assert_instr(vminss, SAE = `8`))]
40635	#[rustc_legacy_const_generics(`2`)]
40636	pub fn _mm_min_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
40637	unsafe {
40638	static_assert_sae!(SAE);
40639	let a: Simd = a.as_f32x4();
40640	let b: Simd = b.as_f32x4();
40641	let r: Simd = vminss(a, b, src:f32x4::ZERO, mask:`0b1`, SAE);
40642	transmute(src:r)
40643	}
40644	}
40645
40646	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40647	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40648	///
40649	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_ss&expand=3780)
40650	#[inline]
40651	#[target_feature(enable = "avx512f")]
40652	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40653	#[cfg_attr(test, assert_instr(vminss, SAE = `8`))]
40654	#[rustc_legacy_const_generics(`4`)]
40655	pub fn _mm_mask_min_round_ss<const SAE: i32>(
40656	src: __m128,
40657	k: __mmask8,
40658	a: __m128,
40659	b: __m128,
40660	) -> __m128 {
40661	unsafe {
40662	static_assert_sae!(SAE);
40663	let a: Simd = a.as_f32x4();
40664	let b: Simd = b.as_f32x4();
40665	let src: Simd = src.as_f32x4();
40666	let r: Simd = vminss(a, b, src, mask:k, SAE);
40667	transmute(src:r)
40668	}
40669	}
40670
40671	/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40672	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40673	///
40674	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_ss&expand=3781)
40675	#[inline]
40676	#[target_feature(enable = "avx512f")]
40677	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40678	#[cfg_attr(test, assert_instr(vminss, SAE = `8`))]
40679	#[rustc_legacy_const_generics(`3`)]
40680	pub fn _mm_maskz_min_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40681	unsafe {
40682	static_assert_sae!(SAE);
40683	let a: Simd = a.as_f32x4();
40684	let b: Simd = b.as_f32x4();
40685	let r: Simd = vminss(a, b, src:f32x4::ZERO, mask:k, SAE);
40686	transmute(src:r)
40687	}
40688	}
40689
40690	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst , and copy the upper element from a to the upper element of dst.\
40691	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40692	///
40693	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_sd&expand=3779)
40694	#[inline]
40695	#[target_feature(enable = "avx512f")]
40696	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40697	#[cfg_attr(test, assert_instr(vminsd, SAE = `8`))]
40698	#[rustc_legacy_const_generics(`2`)]
40699	pub fn _mm_min_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
40700	unsafe {
40701	static_assert_sae!(SAE);
40702	let a: Simd = a.as_f64x2();
40703	let b: Simd = b.as_f64x2();
40704	let r: Simd = vminsd(a, b, src:f64x2::ZERO, mask:`0b1`, SAE);
40705	transmute(src:r)
40706	}
40707	}
40708
40709	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40710	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40711	///
40712	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_sd&expand=3777)
40713	#[inline]
40714	#[target_feature(enable = "avx512f")]
40715	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40716	#[cfg_attr(test, assert_instr(vminsd, SAE = `8`))]
40717	#[rustc_legacy_const_generics(`4`)]
40718	pub fn _mm_mask_min_round_sd<const SAE: i32>(
40719	src: __m128d,
40720	k: __mmask8,
40721	a: __m128d,
40722	b: __m128d,
40723	) -> __m128d {
40724	unsafe {
40725	static_assert_sae!(SAE);
40726	let a: Simd = a.as_f64x2();
40727	let b: Simd = b.as_f64x2();
40728	let src: Simd = src.as_f64x2();
40729	let r: Simd = vminsd(a, b, src, mask:k, SAE);
40730	transmute(src:r)
40731	}
40732	}
40733
40734	/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40735	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40736	///
40737	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_sd&expand=3778)
40738	#[inline]
40739	#[target_feature(enable = "avx512f")]
40740	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40741	#[cfg_attr(test, assert_instr(vminsd, SAE = `8`))]
40742	#[rustc_legacy_const_generics(`3`)]
40743	pub fn _mm_maskz_min_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
40744	unsafe {
40745	static_assert_sae!(SAE);
40746	let a: Simd = a.as_f64x2();
40747	let b: Simd = b.as_f64x2();
40748	let r: Simd = vminsd(a, b, src:f64x2::ZERO, mask:k, SAE);
40749	transmute(src:r)
40750	}
40751	}
40752
40753	/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40754	///
40755	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40756	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40757	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40758	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40759	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40760	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40761	///
40762	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_ss&expand=5383)
40763	#[inline]
40764	#[target_feature(enable = "avx512f")]
40765	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40766	#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = `8`))]
40767	#[rustc_legacy_const_generics(`2`)]
40768	pub fn _mm_sqrt_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
40769	unsafe {
40770	static_assert_rounding!(ROUNDING);
40771	vsqrtss(a, b, src:_mm_setzero_ps(), mask:`0b1`, ROUNDING)
40772	}
40773	}
40774
40775	/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40776	///
40777	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40778	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40779	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40780	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40781	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40782	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40783	///
40784	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_ss&expand=5381)
40785	#[inline]
40786	#[target_feature(enable = "avx512f")]
40787	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40788	#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = `8`))]
40789	#[rustc_legacy_const_generics(`4`)]
40790	pub fn _mm_mask_sqrt_round_ss<const ROUNDING: i32>(
40791	src: __m128,
40792	k: __mmask8,
40793	a: __m128,
40794	b: __m128,
40795	) -> __m128 {
40796	unsafe {
40797	static_assert_rounding!(ROUNDING);
40798	vsqrtss(a, b, src, mask:k, ROUNDING)
40799	}
40800	}
40801
40802	/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40803	///
40804	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40805	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40806	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40807	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40808	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40809	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40810	///
40811	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_ss&expand=5382)
40812	#[inline]
40813	#[target_feature(enable = "avx512f")]
40814	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40815	#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = `8`))]
40816	#[rustc_legacy_const_generics(`3`)]
40817	pub fn _mm_maskz_sqrt_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40818	unsafe {
40819	static_assert_rounding!(ROUNDING);
40820	vsqrtss(a, b, src:_mm_setzero_ps(), mask:k, ROUNDING)
40821	}
40822	}
40823
40824	/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40825	///
40826	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40827	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40828	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40829	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40830	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40831	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40832	///
40833	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_sd&expand=5380)
40834	#[inline]
40835	#[target_feature(enable = "avx512f")]
40836	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40837	#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = `8`))]
40838	#[rustc_legacy_const_generics(`2`)]
40839	pub fn _mm_sqrt_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
40840	unsafe {
40841	static_assert_rounding!(ROUNDING);
40842	vsqrtsd(a, b, src:_mm_setzero_pd(), mask:`0b1`, ROUNDING)
40843	}
40844	}
40845
40846	/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40847	///
40848	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40849	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40850	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40851	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40852	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40853	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40854	///
40855	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_sd&expand=5378)
40856	#[inline]
40857	#[target_feature(enable = "avx512f")]
40858	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40859	#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = `8`))]
40860	#[rustc_legacy_const_generics(`4`)]
40861	pub fn _mm_mask_sqrt_round_sd<const ROUNDING: i32>(
40862	src: __m128d,
40863	k: __mmask8,
40864	a: __m128d,
40865	b: __m128d,
40866	) -> __m128d {
40867	unsafe {
40868	static_assert_rounding!(ROUNDING);
40869	vsqrtsd(a, b, src, mask:k, ROUNDING)
40870	}
40871	}
40872
40873	/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40874	///
40875	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40876	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40877	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40878	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40879	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40880	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40881	///
40882	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_sd&expand=5379)
40883	#[inline]
40884	#[target_feature(enable = "avx512f")]
40885	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40886	#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = `8`))]
40887	#[rustc_legacy_const_generics(`3`)]
40888	pub fn _mm_maskz_sqrt_round_sd<const ROUNDING: i32>(
40889	k: __mmask8,
40890	a: __m128d,
40891	b: __m128d,
40892	) -> __m128d {
40893	unsafe {
40894	static_assert_rounding!(ROUNDING);
40895	vsqrtsd(a, b, src:_mm_setzero_pd(), mask:k, ROUNDING)
40896	}
40897	}
40898
40899	/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
40900	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40901	///
40902	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_ss&expand=2856)
40903	#[inline]
40904	#[target_feature(enable = "avx512f")]
40905	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40906	#[cfg_attr(test, assert_instr(vgetexpss, SAE = `8`))]
40907	#[rustc_legacy_const_generics(`2`)]
40908	pub fn _mm_getexp_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
40909	unsafe {
40910	static_assert_sae!(SAE);
40911	let a: Simd = a.as_f32x4();
40912	let b: Simd = b.as_f32x4();
40913	let r: Simd = vgetexpss(a, b, src:f32x4::ZERO, mask:`0b1`, SAE);
40914	transmute(src:r)
40915	}
40916	}
40917
40918	/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
40919	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40920	///
40921	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_ss&expand=2857)
40922	#[inline]
40923	#[target_feature(enable = "avx512f")]
40924	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40925	#[cfg_attr(test, assert_instr(vgetexpss, SAE = `8`))]
40926	#[rustc_legacy_const_generics(`4`)]
40927	pub fn _mm_mask_getexp_round_ss<const SAE: i32>(
40928	src: __m128,
40929	k: __mmask8,
40930	a: __m128,
40931	b: __m128,
40932	) -> __m128 {
40933	unsafe {
40934	static_assert_sae!(SAE);
40935	let a: Simd = a.as_f32x4();
40936	let b: Simd = b.as_f32x4();
40937	let src: Simd = src.as_f32x4();
40938	let r: Simd = vgetexpss(a, b, src, mask:k, SAE);
40939	transmute(src:r)
40940	}
40941	}
40942
40943	/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
40944	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40945	///
40946	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_ss&expand=2858)
40947	#[inline]
40948	#[target_feature(enable = "avx512f")]
40949	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40950	#[cfg_attr(test, assert_instr(vgetexpss, SAE = `8`))]
40951	#[rustc_legacy_const_generics(`3`)]
40952	pub fn _mm_maskz_getexp_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40953	unsafe {
40954	static_assert_sae!(SAE);
40955	let a: Simd = a.as_f32x4();
40956	let b: Simd = b.as_f32x4();
40957	let r: Simd = vgetexpss(a, b, src:f32x4::ZERO, mask:k, SAE);
40958	transmute(src:r)
40959	}
40960	}
40961
40962	/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
40963	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40964	///
40965	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_sd&expand=2853)
40966	#[inline]
40967	#[target_feature(enable = "avx512f")]
40968	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40969	#[cfg_attr(test, assert_instr(vgetexpsd, SAE = `8`))]
40970	#[rustc_legacy_const_generics(`2`)]
40971	pub fn _mm_getexp_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
40972	unsafe {
40973	static_assert_sae!(SAE);
40974	let a: Simd = a.as_f64x2();
40975	let b: Simd = b.as_f64x2();
40976	let r: Simd = vgetexpsd(a, b, src:f64x2::ZERO, mask:`0b1`, SAE);
40977	transmute(src:r)
40978	}
40979	}
40980
40981	/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
40982	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40983	///
40984	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_sd&expand=2854)
40985	#[inline]
40986	#[target_feature(enable = "avx512f")]
40987	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40988	#[cfg_attr(test, assert_instr(vgetexpsd, SAE = `8`))]
40989	#[rustc_legacy_const_generics(`4`)]
40990	pub fn _mm_mask_getexp_round_sd<const SAE: i32>(
40991	src: __m128d,
40992	k: __mmask8,
40993	a: __m128d,
40994	b: __m128d,
40995	) -> __m128d {
40996	unsafe {
40997	static_assert_sae!(SAE);
40998	let a: Simd = a.as_f64x2();
40999	let b: Simd = b.as_f64x2();
41000	let src: Simd = src.as_f64x2();
41001	let r: Simd = vgetexpsd(a, b, src, mask:k, SAE);
41002	transmute(src:r)
41003	}
41004	}
41005
41006	/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
41007	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41008	///
41009	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_sd&expand=2855)
41010	#[inline]
41011	#[target_feature(enable = "avx512f")]
41012	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41013	#[cfg_attr(test, assert_instr(vgetexpsd, SAE = `8`))]
41014	#[rustc_legacy_const_generics(`3`)]
41015	pub fn _mm_maskz_getexp_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
41016	unsafe {
41017	static_assert_sae!(SAE);
41018	let a: Simd = a.as_f64x2();
41019	let b: Simd = b.as_f64x2();
41020	let r: Simd = vgetexpsd(a, b, src:f64x2::ZERO, mask:k, SAE);
41021	transmute(src:r)
41022	}
41023	}
41024
41025	/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
41026	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
41027	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
41028	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
41029	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
41030	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
41031	/// The sign is determined by sc which can take the following values:\
41032	/// _MM_MANT_SIGN_src // sign = sign(src)\
41033	/// _MM_MANT_SIGN_zero // sign = 0\
41034	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
41035	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41036	///
41037	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_ss&expand=2892)
41038	#[inline]
41039	#[target_feature(enable = "avx512f")]
41040	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41041	#[cfg_attr(test, assert_instr(vgetmantss, NORM = `0`, SIGN = `0`, SAE = `4`))]
41042	#[rustc_legacy_const_generics(`2`, `3`, `4`)]
41043	pub fn _mm_getmant_round_ss<
41044	const NORM: _MM_MANTISSA_NORM_ENUM,
41045	const SIGN: _MM_MANTISSA_SIGN_ENUM,
41046	const SAE: i32,
41047	>(
41048	a: __m128,
41049	b: __m128,
41050	) -> __m128 {
41051	unsafe {
41052	static_assert_uimm_bits!(NORM, `4`);
41053	static_assert_uimm_bits!(SIGN, `2`);
41054	static_assert_mantissas_sae!(SAE);
41055	let a: Simd = a.as_f32x4();
41056	let b: Simd = b.as_f32x4();
41057	let r: Simd = vgetmantss(a, b, SIGN << `2` \| NORM, src:f32x4::ZERO, m:`0b1`, SAE);
41058	transmute(src:r)
41059	}
41060	}
41061
41062	/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
41063	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
41064	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
41065	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
41066	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
41067	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
41068	/// The sign is determined by sc which can take the following values:\
41069	/// _MM_MANT_SIGN_src // sign = sign(src)\
41070	/// _MM_MANT_SIGN_zero // sign = 0\
41071	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
41072	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41073	///
41074	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_ss&expand=2893)
41075	#[inline]
41076	#[target_feature(enable = "avx512f")]
41077	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41078	#[cfg_attr(test, assert_instr(vgetmantss, NORM = `0`, SIGN = `0`, SAE = `4`))]
41079	#[rustc_legacy_const_generics(`4`, `5`, `6`)]
41080	pub fn _mm_mask_getmant_round_ss<
41081	const NORM: _MM_MANTISSA_NORM_ENUM,
41082	const SIGN: _MM_MANTISSA_SIGN_ENUM,
41083	const SAE: i32,
41084	>(
41085	src: __m128,
41086	k: __mmask8,
41087	a: __m128,
41088	b: __m128,
41089	) -> __m128 {
41090	unsafe {
41091	static_assert_uimm_bits!(NORM, `4`);
41092	static_assert_uimm_bits!(SIGN, `2`);
41093	static_assert_mantissas_sae!(SAE);
41094	let a: Simd = a.as_f32x4();
41095	let b: Simd = b.as_f32x4();
41096	let src: Simd = src.as_f32x4();
41097	let r: Simd = vgetmantss(a, b, SIGN << `2` \| NORM, src, m:k, SAE);
41098	transmute(src:r)
41099	}
41100	}
41101
41102	/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
41103	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
41104	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
41105	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
41106	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
41107	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
41108	/// The sign is determined by sc which can take the following values:\
41109	/// _MM_MANT_SIGN_src // sign = sign(src)\
41110	/// _MM_MANT_SIGN_zero // sign = 0\
41111	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
41112	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41113	///
41114	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_ss&expand=2894)
41115	#[inline]
41116	#[target_feature(enable = "avx512f")]
41117	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41118	#[cfg_attr(test, assert_instr(vgetmantss, NORM = `0`, SIGN = `0`, SAE = `4`))]
41119	#[rustc_legacy_const_generics(`3`, `4`, `5`)]
41120	pub fn _mm_maskz_getmant_round_ss<
41121	const NORM: _MM_MANTISSA_NORM_ENUM,
41122	const SIGN: _MM_MANTISSA_SIGN_ENUM,
41123	const SAE: i32,
41124	>(
41125	k: __mmask8,
41126	a: __m128,
41127	b: __m128,
41128	) -> __m128 {
41129	unsafe {
41130	static_assert_uimm_bits!(NORM, `4`);
41131	static_assert_uimm_bits!(SIGN, `2`);
41132	static_assert_mantissas_sae!(SAE);
41133	let a: Simd = a.as_f32x4();
41134	let b: Simd = b.as_f32x4();
41135	let r: Simd = vgetmantss(a, b, SIGN << `2` \| NORM, src:f32x4::ZERO, m:k, SAE);
41136	transmute(src:r)
41137	}
41138	}
41139
41140	/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
41141	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
41142	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
41143	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
41144	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
41145	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
41146	/// The sign is determined by sc which can take the following values:\
41147	/// _MM_MANT_SIGN_src // sign = sign(src)\
41148	/// _MM_MANT_SIGN_zero // sign = 0\
41149	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
41150	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41151	///
41152	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_sd&expand=2889)
41153	#[inline]
41154	#[target_feature(enable = "avx512f")]
41155	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41156	#[cfg_attr(test, assert_instr(vgetmantsd, NORM = `0`, SIGN = `0`, SAE = `4`))]
41157	#[rustc_legacy_const_generics(`2`, `3`, `4`)]
41158	pub fn _mm_getmant_round_sd<
41159	const NORM: _MM_MANTISSA_NORM_ENUM,
41160	const SIGN: _MM_MANTISSA_SIGN_ENUM,
41161	const SAE: i32,
41162	>(
41163	a: __m128d,
41164	b: __m128d,
41165	) -> __m128d {
41166	unsafe {
41167	static_assert_uimm_bits!(NORM, `4`);
41168	static_assert_uimm_bits!(SIGN, `2`);
41169	static_assert_mantissas_sae!(SAE);
41170	let a: Simd = a.as_f64x2();
41171	let b: Simd = b.as_f64x2();
41172	let r: Simd = vgetmantsd(a, b, SIGN << `2` \| NORM, src:f64x2::ZERO, m:`0b1`, SAE);
41173	transmute(src:r)
41174	}
41175	}
41176
41177	/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
41178	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
41179	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
41180	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
41181	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
41182	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
41183	/// The sign is determined by sc which can take the following values:\
41184	/// _MM_MANT_SIGN_src // sign = sign(src)\
41185	/// _MM_MANT_SIGN_zero // sign = 0\
41186	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
41187	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41188	///
41189	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_sd&expand=2890)
41190	#[inline]
41191	#[target_feature(enable = "avx512f")]
41192	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41193	#[cfg_attr(test, assert_instr(vgetmantsd, NORM = `0`, SIGN = `0`, SAE = `4`))]
41194	#[rustc_legacy_const_generics(`4`, `5`, `6`)]
41195	pub fn _mm_mask_getmant_round_sd<
41196	const NORM: _MM_MANTISSA_NORM_ENUM,
41197	const SIGN: _MM_MANTISSA_SIGN_ENUM,
41198	const SAE: i32,
41199	>(
41200	src: __m128d,
41201	k: __mmask8,
41202	a: __m128d,
41203	b: __m128d,
41204	) -> __m128d {
41205	unsafe {
41206	static_assert_uimm_bits!(NORM, `4`);
41207	static_assert_uimm_bits!(SIGN, `2`);
41208	static_assert_mantissas_sae!(SAE);
41209	let a: Simd = a.as_f64x2();
41210	let b: Simd = b.as_f64x2();
41211	let src: Simd = src.as_f64x2();
41212	let r: Simd = vgetmantsd(a, b, SIGN << `2` \| NORM, src, m:k, SAE);
41213	transmute(src:r)
41214	}
41215	}
41216
41217	/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*\|x.significand\|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
41218	/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
41219	/// _MM_MANT_NORM_1_2 // interval [1, 2)\
41220	/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
41221	/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
41222	/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
41223	/// The sign is determined by sc which can take the following values:\
41224	/// _MM_MANT_SIGN_src // sign = sign(src)\
41225	/// _MM_MANT_SIGN_zero // sign = 0\
41226	/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
41227	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41228	///
41229	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_sd&expand=2891)
41230	#[inline]
41231	#[target_feature(enable = "avx512f")]
41232	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41233	#[cfg_attr(test, assert_instr(vgetmantsd, NORM = `0`, SIGN = `0`, SAE = `4`))]
41234	#[rustc_legacy_const_generics(`3`, `4`, `5`)]
41235	pub fn _mm_maskz_getmant_round_sd<
41236	const NORM: _MM_MANTISSA_NORM_ENUM,
41237	const SIGN: _MM_MANTISSA_SIGN_ENUM,
41238	const SAE: i32,
41239	>(
41240	k: __mmask8,
41241	a: __m128d,
41242	b: __m128d,
41243	) -> __m128d {
41244	unsafe {
41245	static_assert_uimm_bits!(NORM, `4`);
41246	static_assert_uimm_bits!(SIGN, `2`);
41247	static_assert_mantissas_sae!(SAE);
41248	let a: Simd = a.as_f64x2();
41249	let b: Simd = b.as_f64x2();
41250	let r: Simd = vgetmantsd(a, b, SIGN << `2` \| NORM, src:f64x2::ZERO, m:k, SAE);
41251	transmute(src:r)
41252	}
41253	}
41254
41255	/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41256	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
41257	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
41258	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
41259	/// * [`_MM_FROUND_TO_POS_INF`] : round up
41260	/// * [`_MM_FROUND_TO_ZERO`] : truncate
41261	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41262	///
41263	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41264	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_ss&expand=4796)
41265	#[inline]
41266	#[target_feature(enable = "avx512f")]
41267	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41268	#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = `0`, SAE = `8`))]
41269	#[rustc_legacy_const_generics(`2`, `3`)]
41270	pub fn _mm_roundscale_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
41271	unsafe {
41272	static_assert_uimm_bits!(IMM8, `8`);
41273	static_assert_mantissas_sae!(SAE);
41274	let a: Simd = a.as_f32x4();
41275	let b: Simd = b.as_f32x4();
41276	let r: Simd = vrndscaless(a, b, src:f32x4::ZERO, mask:`0b11111111`, IMM8, SAE);
41277	transmute(src:r)
41278	}
41279	}
41280
41281	/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41282	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
41283	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
41284	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
41285	/// * [`_MM_FROUND_TO_POS_INF`] : round up
41286	/// * [`_MM_FROUND_TO_ZERO`] : truncate
41287	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41288	///
41289	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41290	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_ss&expand=4794)
41291	#[inline]
41292	#[target_feature(enable = "avx512f")]
41293	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41294	#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = `0`, SAE = `8`))]
41295	#[rustc_legacy_const_generics(`4`, `5`)]
41296	pub fn _mm_mask_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
41297	src: __m128,
41298	k: __mmask8,
41299	a: __m128,
41300	b: __m128,
41301	) -> __m128 {
41302	unsafe {
41303	static_assert_uimm_bits!(IMM8, `8`);
41304	static_assert_mantissas_sae!(SAE);
41305	let a: Simd = a.as_f32x4();
41306	let b: Simd = b.as_f32x4();
41307	let src: Simd = src.as_f32x4();
41308	let r: Simd = vrndscaless(a, b, src, mask:k, IMM8, SAE);
41309	transmute(src:r)
41310	}
41311	}
41312
41313	/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41314	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
41315	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
41316	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
41317	/// * [`_MM_FROUND_TO_POS_INF`] : round up
41318	/// * [`_MM_FROUND_TO_ZERO`] : truncate
41319	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41320	///
41321	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41322	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_ss&expand=4795)
41323	#[inline]
41324	#[target_feature(enable = "avx512f")]
41325	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41326	#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = `0`, SAE = `8`))]
41327	#[rustc_legacy_const_generics(`3`, `4`)]
41328	pub fn _mm_maskz_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
41329	k: __mmask8,
41330	a: __m128,
41331	b: __m128,
41332	) -> __m128 {
41333	unsafe {
41334	static_assert_uimm_bits!(IMM8, `8`);
41335	static_assert_mantissas_sae!(SAE);
41336	let a: Simd = a.as_f32x4();
41337	let b: Simd = b.as_f32x4();
41338	let r: Simd = vrndscaless(a, b, src:f32x4::ZERO, mask:k, IMM8, SAE);
41339	transmute(src:r)
41340	}
41341	}
41342
41343	/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41344	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
41345	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
41346	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
41347	/// * [`_MM_FROUND_TO_POS_INF`] : round up
41348	/// * [`_MM_FROUND_TO_ZERO`] : truncate
41349	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41350	///
41351	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41352	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_sd&expand=4793)
41353	#[inline]
41354	#[target_feature(enable = "avx512f")]
41355	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41356	#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = `0`, SAE = `8`))]
41357	#[rustc_legacy_const_generics(`2`, `3`)]
41358	pub fn _mm_roundscale_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
41359	unsafe {
41360	static_assert_uimm_bits!(IMM8, `8`);
41361	static_assert_mantissas_sae!(SAE);
41362	let a: Simd = a.as_f64x2();
41363	let b: Simd = b.as_f64x2();
41364	let r: Simd = vrndscalesd(a, b, src:f64x2::ZERO, mask:`0b11111111`, IMM8, SAE);
41365	transmute(src:r)
41366	}
41367	}
41368
41369	/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41370	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
41371	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
41372	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
41373	/// * [`_MM_FROUND_TO_POS_INF`] : round up
41374	/// * [`_MM_FROUND_TO_ZERO`] : truncate
41375	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41376	///
41377	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41378	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_sd&expand=4791)
41379	#[inline]
41380	#[target_feature(enable = "avx512f")]
41381	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41382	#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = `0`, SAE = `8`))]
41383	#[rustc_legacy_const_generics(`4`, `5`)]
41384	pub fn _mm_mask_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
41385	src: __m128d,
41386	k: __mmask8,
41387	a: __m128d,
41388	b: __m128d,
41389	) -> __m128d {
41390	unsafe {
41391	static_assert_uimm_bits!(IMM8, `8`);
41392	static_assert_mantissas_sae!(SAE);
41393	let a: Simd = a.as_f64x2();
41394	let b: Simd = b.as_f64x2();
41395	let src: Simd = src.as_f64x2();
41396	let r: Simd = vrndscalesd(a, b, src, mask:k, IMM8, SAE);
41397	transmute(src:r)
41398	}
41399	}
41400
41401	/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41402	/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
41403	/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
41404	/// * [`_MM_FROUND_TO_NEG_INF`] : round down
41405	/// * [`_MM_FROUND_TO_POS_INF`] : round up
41406	/// * [`_MM_FROUND_TO_ZERO`] : truncate
41407	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41408	///
41409	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41410	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_sd&expand=4792)
41411	#[inline]
41412	#[target_feature(enable = "avx512f")]
41413	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41414	#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = `0`, SAE = `8`))]
41415	#[rustc_legacy_const_generics(`3`, `4`)]
41416	pub fn _mm_maskz_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
41417	k: __mmask8,
41418	a: __m128d,
41419	b: __m128d,
41420	) -> __m128d {
41421	unsafe {
41422	static_assert_uimm_bits!(IMM8, `8`);
41423	static_assert_mantissas_sae!(SAE);
41424	let a: Simd = a.as_f64x2();
41425	let b: Simd = b.as_f64x2();
41426	let r: Simd = vrndscalesd(a, b, src:f64x2::ZERO, mask:k, IMM8, SAE);
41427	transmute(src:r)
41428	}
41429	}
41430
41431	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41432	///
41433	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41434	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41435	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41436	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41437	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41438	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41439	///
41440	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_ss&expand=4895)
41441	#[inline]
41442	#[target_feature(enable = "avx512f")]
41443	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41444	#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = `8`))]
41445	#[rustc_legacy_const_generics(`2`)]
41446	pub fn _mm_scalef_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
41447	unsafe {
41448	static_assert_rounding!(ROUNDING);
41449	let a: Simd = a.as_f32x4();
41450	let b: Simd = b.as_f32x4();
41451	let r: Simd = vscalefss(a, b, src:f32x4::ZERO, mask:`0b11111111`, ROUNDING);
41452	transmute(src:r)
41453	}
41454	}
41455
41456	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41457	///
41458	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41459	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41460	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41461	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41462	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41463	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41464	///
41465	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_ss&expand=4893)
41466	#[inline]
41467	#[target_feature(enable = "avx512f")]
41468	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41469	#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = `8`))]
41470	#[rustc_legacy_const_generics(`4`)]
41471	pub fn _mm_mask_scalef_round_ss<const ROUNDING: i32>(
41472	src: __m128,
41473	k: __mmask8,
41474	a: __m128,
41475	b: __m128,
41476	) -> __m128 {
41477	unsafe {
41478	static_assert_rounding!(ROUNDING);
41479	let a: Simd = a.as_f32x4();
41480	let b: Simd = b.as_f32x4();
41481	let src: Simd = src.as_f32x4();
41482	let r: Simd = vscalefss(a, b, src, mask:k, ROUNDING);
41483	transmute(src:r)
41484	}
41485	}
41486
41487	/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41488	///
41489	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41490	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41491	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41492	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41493	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41494	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41495	///
41496	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_ss&expand=4894)
41497	#[inline]
41498	#[target_feature(enable = "avx512f")]
41499	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41500	#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = `8`))]
41501	#[rustc_legacy_const_generics(`3`)]
41502	pub fn _mm_maskz_scalef_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
41503	unsafe {
41504	static_assert_rounding!(ROUNDING);
41505	let a: Simd = a.as_f32x4();
41506	let b: Simd = b.as_f32x4();
41507	let r: Simd = vscalefss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
41508	transmute(src:r)
41509	}
41510	}
41511
41512	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41513	///
41514	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41515	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41516	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41517	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41518	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41519	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41520	///
41521	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_sd&expand=4892)
41522	#[inline]
41523	#[target_feature(enable = "avx512f")]
41524	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41525	#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = `8`))]
41526	#[rustc_legacy_const_generics(`2`)]
41527	pub fn _mm_scalef_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
41528	unsafe {
41529	static_assert_rounding!(ROUNDING);
41530	let a: Simd = a.as_f64x2();
41531	let b: Simd = b.as_f64x2();
41532	let r: Simd = vscalefsd(a, b, src:f64x2::ZERO, mask:`0b11111111`, ROUNDING);
41533	transmute(src:r)
41534	}
41535	}
41536
41537	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41538	///
41539	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41540	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41541	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41542	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41543	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41544	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41545	///
41546	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_sd&expand=4890)
41547	#[inline]
41548	#[target_feature(enable = "avx512f")]
41549	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41550	#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = `8`))]
41551	#[rustc_legacy_const_generics(`4`)]
41552	pub fn _mm_mask_scalef_round_sd<const ROUNDING: i32>(
41553	src: __m128d,
41554	k: __mmask8,
41555	a: __m128d,
41556	b: __m128d,
41557	) -> __m128d {
41558	unsafe {
41559	let a: Simd = a.as_f64x2();
41560	let b: Simd = b.as_f64x2();
41561	let src: Simd = src.as_f64x2();
41562	let r: Simd = vscalefsd(a, b, src, mask:k, ROUNDING);
41563	transmute(src:r)
41564	}
41565	}
41566
41567	/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41568	///
41569	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41570	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41571	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41572	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41573	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41574	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41575	///
41576	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_sd&expand=4891)
41577	#[inline]
41578	#[target_feature(enable = "avx512f")]
41579	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41580	#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = `8`))]
41581	#[rustc_legacy_const_generics(`3`)]
41582	pub fn _mm_maskz_scalef_round_sd<const ROUNDING: i32>(
41583	k: __mmask8,
41584	a: __m128d,
41585	b: __m128d,
41586	) -> __m128d {
41587	unsafe {
41588	static_assert_rounding!(ROUNDING);
41589	let a: Simd = a.as_f64x2();
41590	let b: Simd = b.as_f64x2();
41591	let r: Simd = vscalefsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
41592	transmute(src:r)
41593	}
41594	}
41595
41596	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41597	///
41598	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41599	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41600	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41601	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41602	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41603	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41604	///
41605	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_ss&expand=2573)
41606	#[inline]
41607	#[target_feature(enable = "avx512f")]
41608	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41609	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
41610	#[rustc_legacy_const_generics(`3`)]
41611	pub fn _mm_fmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
41612	unsafe {
41613	static_assert_rounding!(ROUNDING);
41614	let extracta: f32 = simd_extract!(a, `0`);
41615	let extractb: f32 = simd_extract!(b, `0`);
41616	let extractc: f32 = simd_extract!(c, `0`);
41617	let r: f32 = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
41618	simd_insert!(a, `0`, r)
41619	}
41620	}
41621
41622	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41623	///
41624	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41625	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41626	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41627	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41628	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41629	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41630	///
41631	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_ss&expand=2574)
41632	#[inline]
41633	#[target_feature(enable = "avx512f")]
41634	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41635	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
41636	#[rustc_legacy_const_generics(`4`)]
41637	pub fn _mm_mask_fmadd_round_ss<const ROUNDING: i32>(
41638	a: __m128,
41639	k: __mmask8,
41640	b: __m128,
41641	c: __m128,
41642	) -> __m128 {
41643	unsafe {
41644	static_assert_rounding!(ROUNDING);
41645	let mut fmadd: f32 = simd_extract!(a, `0`);
41646	if (k & `0b00000001`) != `0` {
41647	let extractb: f32 = simd_extract!(b, `0`);
41648	let extractc: f32 = simd_extract!(c, `0`);
41649	fmadd = vfmaddssround(a:fmadd, b:extractb, c:extractc, ROUNDING);
41650	}
41651	simd_insert!(a, `0`, fmadd)
41652	}
41653	}
41654
41655	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41656	///
41657	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41658	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41659	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41660	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41661	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41662	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41663	///
41664	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_ss&expand=2576)
41665	#[inline]
41666	#[target_feature(enable = "avx512f")]
41667	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41668	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
41669	#[rustc_legacy_const_generics(`4`)]
41670	pub fn _mm_maskz_fmadd_round_ss<const ROUNDING: i32>(
41671	k: __mmask8,
41672	a: __m128,
41673	b: __m128,
41674	c: __m128,
41675	) -> __m128 {
41676	unsafe {
41677	static_assert_rounding!(ROUNDING);
41678	let mut fmadd: f32 = `0.`;
41679	if (k & `0b00000001`) != `0` {
41680	let extracta: f32 = simd_extract!(a, `0`);
41681	let extractb: f32 = simd_extract!(b, `0`);
41682	let extractc: f32 = simd_extract!(c, `0`);
41683	fmadd = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
41684	}
41685	simd_insert!(a, `0`, fmadd)
41686	}
41687	}
41688
41689	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
41690	///
41691	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41692	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41693	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41694	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41695	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41696	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41697	///
41698	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_ss&expand=2575)
41699	#[inline]
41700	#[target_feature(enable = "avx512f")]
41701	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41702	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
41703	#[rustc_legacy_const_generics(`4`)]
41704	pub fn _mm_mask3_fmadd_round_ss<const ROUNDING: i32>(
41705	a: __m128,
41706	b: __m128,
41707	c: __m128,
41708	k: __mmask8,
41709	) -> __m128 {
41710	unsafe {
41711	static_assert_rounding!(ROUNDING);
41712	let mut fmadd: f32 = simd_extract!(c, `0`);
41713	if (k & `0b00000001`) != `0` {
41714	let extracta: f32 = simd_extract!(a, `0`);
41715	let extractb: f32 = simd_extract!(b, `0`);
41716	fmadd = vfmaddssround(a:extracta, b:extractb, c:fmadd, ROUNDING);
41717	}
41718	simd_insert!(c, `0`, fmadd)
41719	}
41720	}
41721
41722	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41723	///
41724	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41725	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41726	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41727	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41728	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41729	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41730	///
41731	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_sd&expand=2569)
41732	#[inline]
41733	#[target_feature(enable = "avx512f")]
41734	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41735	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
41736	#[rustc_legacy_const_generics(`3`)]
41737	pub fn _mm_fmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
41738	unsafe {
41739	static_assert_rounding!(ROUNDING);
41740	let extracta: f64 = simd_extract!(a, `0`);
41741	let extractb: f64 = simd_extract!(b, `0`);
41742	let extractc: f64 = simd_extract!(c, `0`);
41743	let fmadd: f64 = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
41744	simd_insert!(a, `0`, fmadd)
41745	}
41746	}
41747
41748	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41749	///
41750	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41751	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41752	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41753	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41754	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41755	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41756	///
41757	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_sd&expand=2570)
41758	#[inline]
41759	#[target_feature(enable = "avx512f")]
41760	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41761	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
41762	#[rustc_legacy_const_generics(`4`)]
41763	pub fn _mm_mask_fmadd_round_sd<const ROUNDING: i32>(
41764	a: __m128d,
41765	k: __mmask8,
41766	b: __m128d,
41767	c: __m128d,
41768	) -> __m128d {
41769	unsafe {
41770	static_assert_rounding!(ROUNDING);
41771	let mut fmadd: f64 = simd_extract!(a, `0`);
41772	if (k & `0b00000001`) != `0` {
41773	let extractb: f64 = simd_extract!(b, `0`);
41774	let extractc: f64 = simd_extract!(c, `0`);
41775	fmadd = vfmaddsdround(a:fmadd, b:extractb, c:extractc, ROUNDING);
41776	}
41777	simd_insert!(a, `0`, fmadd)
41778	}
41779	}
41780
41781	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41782	///
41783	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41784	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41785	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41786	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41787	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41788	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41789	///
41790	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_sd&expand=2572)
41791	#[inline]
41792	#[target_feature(enable = "avx512f")]
41793	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41794	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
41795	#[rustc_legacy_const_generics(`4`)]
41796	pub fn _mm_maskz_fmadd_round_sd<const ROUNDING: i32>(
41797	k: __mmask8,
41798	a: __m128d,
41799	b: __m128d,
41800	c: __m128d,
41801	) -> __m128d {
41802	unsafe {
41803	static_assert_rounding!(ROUNDING);
41804	let mut fmadd: f64 = `0.`;
41805	if (k & `0b00000001`) != `0` {
41806	let extracta: f64 = simd_extract!(a, `0`);
41807	let extractb: f64 = simd_extract!(b, `0`);
41808	let extractc: f64 = simd_extract!(c, `0`);
41809	fmadd = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
41810	}
41811	simd_insert!(a, `0`, fmadd)
41812	}
41813	}
41814
41815	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
41816	///
41817	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41818	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41819	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41820	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41821	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41822	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41823	///
41824	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_sd&expand=2571)
41825	#[inline]
41826	#[target_feature(enable = "avx512f")]
41827	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41828	#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = `8`))]
41829	#[rustc_legacy_const_generics(`4`)]
41830	pub fn _mm_mask3_fmadd_round_sd<const ROUNDING: i32>(
41831	a: __m128d,
41832	b: __m128d,
41833	c: __m128d,
41834	k: __mmask8,
41835	) -> __m128d {
41836	unsafe {
41837	static_assert_rounding!(ROUNDING);
41838	let mut fmadd: f64 = simd_extract!(c, `0`);
41839	if (k & `0b00000001`) != `0` {
41840	let extracta: f64 = simd_extract!(a, `0`);
41841	let extractb: f64 = simd_extract!(b, `0`);
41842	fmadd = vfmaddsdround(a:extracta, b:extractb, c:fmadd, ROUNDING);
41843	}
41844	simd_insert!(c, `0`, fmadd)
41845	}
41846	}
41847
41848	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41849	///
41850	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41851	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41852	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41853	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41854	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41855	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41856	///
41857	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_ss&expand=2659)
41858	#[inline]
41859	#[target_feature(enable = "avx512f")]
41860	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41861	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
41862	#[rustc_legacy_const_generics(`3`)]
41863	pub fn _mm_fmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
41864	unsafe {
41865	static_assert_rounding!(ROUNDING);
41866	let extracta: f32 = simd_extract!(a, `0`);
41867	let extractb: f32 = simd_extract!(b, `0`);
41868	let extractc: f32 = simd_extract!(c, `0`);
41869	let extractc: f32 = -extractc;
41870	let fmsub: f32 = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
41871	simd_insert!(a, `0`, fmsub)
41872	}
41873	}
41874
41875	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41876	///
41877	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41878	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41879	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41880	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41881	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41882	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41883	///
41884	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_ss&expand=2660)
41885	#[inline]
41886	#[target_feature(enable = "avx512f")]
41887	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41888	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
41889	#[rustc_legacy_const_generics(`4`)]
41890	pub fn _mm_mask_fmsub_round_ss<const ROUNDING: i32>(
41891	a: __m128,
41892	k: __mmask8,
41893	b: __m128,
41894	c: __m128,
41895	) -> __m128 {
41896	unsafe {
41897	static_assert_rounding!(ROUNDING);
41898	let mut fmsub: f32 = simd_extract!(a, `0`);
41899	if (k & `0b00000001`) != `0` {
41900	let extractb: f32 = simd_extract!(b, `0`);
41901	let extractc: f32 = simd_extract!(c, `0`);
41902	let extractc: f32 = -extractc;
41903	fmsub = vfmaddssround(a:fmsub, b:extractb, c:extractc, ROUNDING);
41904	}
41905	simd_insert!(a, `0`, fmsub)
41906	}
41907	}
41908
41909	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41910	///
41911	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41912	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41913	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41914	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41915	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41916	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41917	///
41918	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_ss&expand=2662)
41919	#[inline]
41920	#[target_feature(enable = "avx512f")]
41921	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41922	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
41923	#[rustc_legacy_const_generics(`4`)]
41924	pub fn _mm_maskz_fmsub_round_ss<const ROUNDING: i32>(
41925	k: __mmask8,
41926	a: __m128,
41927	b: __m128,
41928	c: __m128,
41929	) -> __m128 {
41930	unsafe {
41931	static_assert_rounding!(ROUNDING);
41932	let mut fmsub: f32 = `0.`;
41933	if (k & `0b00000001`) != `0` {
41934	let extracta: f32 = simd_extract!(a, `0`);
41935	let extractb: f32 = simd_extract!(b, `0`);
41936	let extractc: f32 = simd_extract!(c, `0`);
41937	let extractc: f32 = -extractc;
41938	fmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
41939	}
41940	simd_insert!(a, `0`, fmsub)
41941	}
41942	}
41943
41944	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
41945	///
41946	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41947	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41948	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41949	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41950	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41951	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41952	///
41953	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_ss&expand=2661)
41954	#[inline]
41955	#[target_feature(enable = "avx512f")]
41956	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41957	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
41958	#[rustc_legacy_const_generics(`4`)]
41959	pub fn _mm_mask3_fmsub_round_ss<const ROUNDING: i32>(
41960	a: __m128,
41961	b: __m128,
41962	c: __m128,
41963	k: __mmask8,
41964	) -> __m128 {
41965	unsafe {
41966	static_assert_rounding!(ROUNDING);
41967	let mut fmsub: f32 = simd_extract!(c, `0`);
41968	if (k & `0b00000001`) != `0` {
41969	let extracta: f32 = simd_extract!(a, `0`);
41970	let extractb: f32 = simd_extract!(b, `0`);
41971	let extractc: f32 = -fmsub;
41972	fmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
41973	}
41974	simd_insert!(c, `0`, fmsub)
41975	}
41976	}
41977
41978	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41979	///
41980	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41981	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41982	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41983	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41984	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41985	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41986	///
41987	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_sd&expand=2655)
41988	#[inline]
41989	#[target_feature(enable = "avx512f")]
41990	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41991	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
41992	#[rustc_legacy_const_generics(`3`)]
41993	pub fn _mm_fmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
41994	unsafe {
41995	static_assert_rounding!(ROUNDING);
41996	let extracta: f64 = simd_extract!(a, `0`);
41997	let extractb: f64 = simd_extract!(b, `0`);
41998	let extractc: f64 = simd_extract!(c, `0`);
41999	let extractc: f64 = -extractc;
42000	let fmsub: f64 = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
42001	simd_insert!(a, `0`, fmsub)
42002	}
42003	}
42004
42005	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
42006	///
42007	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42008	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42009	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42010	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42011	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42012	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42013	///
42014	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_sd&expand=2656)
42015	#[inline]
42016	#[target_feature(enable = "avx512f")]
42017	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42018	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
42019	#[rustc_legacy_const_generics(`4`)]
42020	pub fn _mm_mask_fmsub_round_sd<const ROUNDING: i32>(
42021	a: __m128d,
42022	k: __mmask8,
42023	b: __m128d,
42024	c: __m128d,
42025	) -> __m128d {
42026	unsafe {
42027	static_assert_rounding!(ROUNDING);
42028	let mut fmsub: f64 = simd_extract!(a, `0`);
42029	if (k & `0b00000001`) != `0` {
42030	let extractb: f64 = simd_extract!(b, `0`);
42031	let extractc: f64 = simd_extract!(c, `0`);
42032	let extractc: f64 = -extractc;
42033	fmsub = vfmaddsdround(a:fmsub, b:extractb, c:extractc, ROUNDING);
42034	}
42035	simd_insert!(a, `0`, fmsub)
42036	}
42037	}
42038
42039	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
42040	///
42041	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42042	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42043	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42044	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42045	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42046	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42047	///
42048	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_sd&expand=2658)
42049	#[inline]
42050	#[target_feature(enable = "avx512f")]
42051	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42052	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
42053	#[rustc_legacy_const_generics(`4`)]
42054	pub fn _mm_maskz_fmsub_round_sd<const ROUNDING: i32>(
42055	k: __mmask8,
42056	a: __m128d,
42057	b: __m128d,
42058	c: __m128d,
42059	) -> __m128d {
42060	unsafe {
42061	static_assert_rounding!(ROUNDING);
42062	let mut fmsub: f64 = `0.`;
42063	if (k & `0b00000001`) != `0` {
42064	let extracta: f64 = simd_extract!(a, `0`);
42065	let extractb: f64 = simd_extract!(b, `0`);
42066	let extractc: f64 = simd_extract!(c, `0`);
42067	let extractc: f64 = -extractc;
42068	fmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
42069	}
42070	simd_insert!(a, `0`, fmsub)
42071	}
42072	}
42073
42074	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
42075	///
42076	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42077	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42078	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42079	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42080	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42081	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42082	///
42083	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_sd&expand=2657)
42084	#[inline]
42085	#[target_feature(enable = "avx512f")]
42086	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42087	#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = `8`))]
42088	#[rustc_legacy_const_generics(`4`)]
42089	pub fn _mm_mask3_fmsub_round_sd<const ROUNDING: i32>(
42090	a: __m128d,
42091	b: __m128d,
42092	c: __m128d,
42093	k: __mmask8,
42094	) -> __m128d {
42095	unsafe {
42096	static_assert_rounding!(ROUNDING);
42097	let mut fmsub: f64 = simd_extract!(c, `0`);
42098	if (k & `0b00000001`) != `0` {
42099	let extracta: f64 = simd_extract!(a, `0`);
42100	let extractb: f64 = simd_extract!(b, `0`);
42101	let extractc: f64 = -fmsub;
42102	fmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
42103	}
42104	simd_insert!(c, `0`, fmsub)
42105	}
42106	}
42107
42108	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
42109	///
42110	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42111	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42112	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42113	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42114	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42115	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42116	///
42117	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_ss&expand=2739)
42118	#[inline]
42119	#[target_feature(enable = "avx512f")]
42120	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42121	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
42122	#[rustc_legacy_const_generics(`3`)]
42123	pub fn _mm_fnmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
42124	unsafe {
42125	static_assert_rounding!(ROUNDING);
42126	let extracta: f32 = simd_extract!(a, `0`);
42127	let extracta: f32 = -extracta;
42128	let extractb: f32 = simd_extract!(b, `0`);
42129	let extractc: f32 = simd_extract!(c, `0`);
42130	let fnmadd: f32 = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
42131	simd_insert!(a, `0`, fnmadd)
42132	}
42133	}
42134
42135	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
42136	///
42137	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42138	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42139	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42140	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42141	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42142	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42143	///
42144	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_ss&expand=2740)
42145	#[inline]
42146	#[target_feature(enable = "avx512f")]
42147	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42148	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
42149	#[rustc_legacy_const_generics(`4`)]
42150	pub fn _mm_mask_fnmadd_round_ss<const ROUNDING: i32>(
42151	a: __m128,
42152	k: __mmask8,
42153	b: __m128,
42154	c: __m128,
42155	) -> __m128 {
42156	unsafe {
42157	static_assert_rounding!(ROUNDING);
42158	let mut fnmadd: f32 = simd_extract!(a, `0`);
42159	if (k & `0b00000001`) != `0` {
42160	let extracta: f32 = -fnmadd;
42161	let extractb: f32 = simd_extract!(b, `0`);
42162	let extractc: f32 = simd_extract!(c, `0`);
42163	fnmadd = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
42164	}
42165	simd_insert!(a, `0`, fnmadd)
42166	}
42167	}
42168
42169	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
42170	///
42171	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42172	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42173	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42174	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42175	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42176	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42177	///
42178	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_ss&expand=2742)
42179	#[inline]
42180	#[target_feature(enable = "avx512f")]
42181	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42182	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
42183	#[rustc_legacy_const_generics(`4`)]
42184	pub fn _mm_maskz_fnmadd_round_ss<const ROUNDING: i32>(
42185	k: __mmask8,
42186	a: __m128,
42187	b: __m128,
42188	c: __m128,
42189	) -> __m128 {
42190	unsafe {
42191	static_assert_rounding!(ROUNDING);
42192	let mut fnmadd: f32 = `0.`;
42193	if (k & `0b00000001`) != `0` {
42194	let extracta: f32 = simd_extract!(a, `0`);
42195	let extracta: f32 = -extracta;
42196	let extractb: f32 = simd_extract!(b, `0`);
42197	let extractc: f32 = simd_extract!(c, `0`);
42198	fnmadd = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
42199	}
42200	simd_insert!(a, `0`, fnmadd)
42201	}
42202	}
42203
42204	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
42205	///
42206	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42207	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42208	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42209	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42210	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42211	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42212	///
42213	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_ss&expand=2741)
42214	#[inline]
42215	#[target_feature(enable = "avx512f")]
42216	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42217	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
42218	#[rustc_legacy_const_generics(`4`)]
42219	pub fn _mm_mask3_fnmadd_round_ss<const ROUNDING: i32>(
42220	a: __m128,
42221	b: __m128,
42222	c: __m128,
42223	k: __mmask8,
42224	) -> __m128 {
42225	unsafe {
42226	static_assert_rounding!(ROUNDING);
42227	let mut fnmadd: f32 = simd_extract!(c, `0`);
42228	if (k & `0b00000001`) != `0` {
42229	let extracta: f32 = simd_extract!(a, `0`);
42230	let extracta: f32 = -extracta;
42231	let extractb: f32 = simd_extract!(b, `0`);
42232	fnmadd = vfmaddssround(a:extracta, b:extractb, c:fnmadd, ROUNDING);
42233	}
42234	simd_insert!(c, `0`, fnmadd)
42235	}
42236	}
42237
42238	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
42239	///
42240	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42241	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42242	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42243	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42244	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42245	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42246	///
42247	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_sd&expand=2735)
42248	#[inline]
42249	#[target_feature(enable = "avx512f")]
42250	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42251	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
42252	#[rustc_legacy_const_generics(`3`)]
42253	pub fn _mm_fnmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
42254	unsafe {
42255	static_assert_rounding!(ROUNDING);
42256	let extracta: f64 = simd_extract!(a, `0`);
42257	let extracta: f64 = -extracta;
42258	let extractb: f64 = simd_extract!(b, `0`);
42259	let extractc: f64 = simd_extract!(c, `0`);
42260	let fnmadd: f64 = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
42261	simd_insert!(a, `0`, fnmadd)
42262	}
42263	}
42264
42265	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
42266	///
42267	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42268	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42269	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42270	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42271	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42272	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42273	///
42274	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_sd&expand=2736)
42275	#[inline]
42276	#[target_feature(enable = "avx512f")]
42277	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42278	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
42279	#[rustc_legacy_const_generics(`4`)]
42280	pub fn _mm_mask_fnmadd_round_sd<const ROUNDING: i32>(
42281	a: __m128d,
42282	k: __mmask8,
42283	b: __m128d,
42284	c: __m128d,
42285	) -> __m128d {
42286	unsafe {
42287	static_assert_rounding!(ROUNDING);
42288	let mut fnmadd: f64 = simd_extract!(a, `0`);
42289	if (k & `0b00000001`) != `0` {
42290	let extracta: f64 = -fnmadd;
42291	let extractb: f64 = simd_extract!(b, `0`);
42292	let extractc: f64 = simd_extract!(c, `0`);
42293	fnmadd = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
42294	}
42295	simd_insert!(a, `0`, fnmadd)
42296	}
42297	}
42298
42299	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
42300	///
42301	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42302	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42303	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42304	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42305	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42306	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42307	///
42308	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_sd&expand=2738)
42309	#[inline]
42310	#[target_feature(enable = "avx512f")]
42311	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42312	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
42313	#[rustc_legacy_const_generics(`4`)]
42314	pub fn _mm_maskz_fnmadd_round_sd<const ROUNDING: i32>(
42315	k: __mmask8,
42316	a: __m128d,
42317	b: __m128d,
42318	c: __m128d,
42319	) -> __m128d {
42320	unsafe {
42321	static_assert_rounding!(ROUNDING);
42322	let mut fnmadd: f64 = `0.`;
42323	if (k & `0b00000001`) != `0` {
42324	let extracta: f64 = simd_extract!(a, `0`);
42325	let extracta: f64 = -extracta;
42326	let extractb: f64 = simd_extract!(b, `0`);
42327	let extractc: f64 = simd_extract!(c, `0`);
42328	fnmadd = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
42329	}
42330	simd_insert!(a, `0`, fnmadd)
42331	}
42332	}
42333
42334	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
42335	///
42336	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42337	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42338	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42339	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42340	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42341	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42342	///
42343	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_sd&expand=2737)
42344	#[inline]
42345	#[target_feature(enable = "avx512f")]
42346	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42347	#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = `8`))]
42348	#[rustc_legacy_const_generics(`4`)]
42349	pub fn _mm_mask3_fnmadd_round_sd<const ROUNDING: i32>(
42350	a: __m128d,
42351	b: __m128d,
42352	c: __m128d,
42353	k: __mmask8,
42354	) -> __m128d {
42355	unsafe {
42356	static_assert_rounding!(ROUNDING);
42357	let mut fnmadd: f64 = simd_extract!(c, `0`);
42358	if (k & `0b00000001`) != `0` {
42359	let extracta: f64 = simd_extract!(a, `0`);
42360	let extracta: f64 = -extracta;
42361	let extractb: f64 = simd_extract!(b, `0`);
42362	fnmadd = vfmaddsdround(a:extracta, b:extractb, c:fnmadd, ROUNDING);
42363	}
42364	simd_insert!(c, `0`, fnmadd)
42365	}
42366	}
42367
42368	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
42369	///
42370	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42371	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42372	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42373	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42374	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42375	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42376	///
42377	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_ss&expand=2787)
42378	#[inline]
42379	#[target_feature(enable = "avx512f")]
42380	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42381	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
42382	#[rustc_legacy_const_generics(`3`)]
42383	pub fn _mm_fnmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
42384	unsafe {
42385	static_assert_rounding!(ROUNDING);
42386	let extracta: f32 = simd_extract!(a, `0`);
42387	let extracta: f32 = -extracta;
42388	let extractb: f32 = simd_extract!(b, `0`);
42389	let extractc: f32 = simd_extract!(c, `0`);
42390	let extractc: f32 = -extractc;
42391	let fnmsub: f32 = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
42392	simd_insert!(a, `0`, fnmsub)
42393	}
42394	}
42395
42396	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
42397	///
42398	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42399	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42400	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42401	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42402	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42403	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42404	///
42405	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_ss&expand=2788)
42406	#[inline]
42407	#[target_feature(enable = "avx512f")]
42408	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42409	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
42410	#[rustc_legacy_const_generics(`4`)]
42411	pub fn _mm_mask_fnmsub_round_ss<const ROUNDING: i32>(
42412	a: __m128,
42413	k: __mmask8,
42414	b: __m128,
42415	c: __m128,
42416	) -> __m128 {
42417	unsafe {
42418	static_assert_rounding!(ROUNDING);
42419	let mut fnmsub: f32 = simd_extract!(a, `0`);
42420	if (k & `0b00000001`) != `0` {
42421	let extracta: f32 = -fnmsub;
42422	let extractb: f32 = simd_extract!(b, `0`);
42423	let extractc: f32 = simd_extract!(c, `0`);
42424	let extractc: f32 = -extractc;
42425	fnmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
42426	}
42427	simd_insert!(a, `0`, fnmsub)
42428	}
42429	}
42430
42431	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
42432	///
42433	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42434	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42435	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42436	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42437	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42438	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42439	///
42440	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_ss&expand=2790)
42441	#[inline]
42442	#[target_feature(enable = "avx512f")]
42443	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42444	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
42445	#[rustc_legacy_const_generics(`4`)]
42446	pub fn _mm_maskz_fnmsub_round_ss<const ROUNDING: i32>(
42447	k: __mmask8,
42448	a: __m128,
42449	b: __m128,
42450	c: __m128,
42451	) -> __m128 {
42452	unsafe {
42453	static_assert_rounding!(ROUNDING);
42454	let mut fnmsub: f32 = `0.`;
42455	if (k & `0b00000001`) != `0` {
42456	let extracta: f32 = simd_extract!(a, `0`);
42457	let extracta: f32 = -extracta;
42458	let extractb: f32 = simd_extract!(b, `0`);
42459	let extractc: f32 = simd_extract!(c, `0`);
42460	let extractc: f32 = -extractc;
42461	fnmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
42462	}
42463	simd_insert!(a, `0`, fnmsub)
42464	}
42465	}
42466
42467	/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
42468	///
42469	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42470	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42471	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42472	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42473	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42474	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42475	///
42476	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_ss&expand=2789)
42477	#[inline]
42478	#[target_feature(enable = "avx512f")]
42479	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42480	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
42481	#[rustc_legacy_const_generics(`4`)]
42482	pub fn _mm_mask3_fnmsub_round_ss<const ROUNDING: i32>(
42483	a: __m128,
42484	b: __m128,
42485	c: __m128,
42486	k: __mmask8,
42487	) -> __m128 {
42488	unsafe {
42489	static_assert_rounding!(ROUNDING);
42490	let mut fnmsub: f32 = simd_extract!(c, `0`);
42491	if (k & `0b00000001`) != `0` {
42492	let extracta: f32 = simd_extract!(a, `0`);
42493	let extracta: f32 = -extracta;
42494	let extractb: f32 = simd_extract!(b, `0`);
42495	let extractc: f32 = -fnmsub;
42496	fnmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
42497	}
42498	simd_insert!(c, `0`, fnmsub)
42499	}
42500	}
42501
42502	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
42503	///
42504	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42505	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42506	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42507	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42508	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42509	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42510	///
42511	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_sd&expand=2783)
42512	#[inline]
42513	#[target_feature(enable = "avx512f")]
42514	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42515	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
42516	#[rustc_legacy_const_generics(`3`)]
42517	pub fn _mm_fnmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
42518	unsafe {
42519	static_assert_rounding!(ROUNDING);
42520	let extracta: f64 = simd_extract!(a, `0`);
42521	let extracta: f64 = -extracta;
42522	let extractb: f64 = simd_extract!(b, `0`);
42523	let extractc: f64 = simd_extract!(c, `0`);
42524	let extractc: f64 = -extractc;
42525	let fnmsub: f64 = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
42526	simd_insert!(a, `0`, fnmsub)
42527	}
42528	}
42529
42530	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
42531	///
42532	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42533	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42534	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42535	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42536	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42537	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42538	///
42539	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_sd&expand=2784)
42540	#[inline]
42541	#[target_feature(enable = "avx512f")]
42542	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42543	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
42544	#[rustc_legacy_const_generics(`4`)]
42545	pub fn _mm_mask_fnmsub_round_sd<const ROUNDING: i32>(
42546	a: __m128d,
42547	k: __mmask8,
42548	b: __m128d,
42549	c: __m128d,
42550	) -> __m128d {
42551	unsafe {
42552	static_assert_rounding!(ROUNDING);
42553	let mut fnmsub: f64 = simd_extract!(a, `0`);
42554	if (k & `0b00000001`) != `0` {
42555	let extracta: f64 = -fnmsub;
42556	let extractb: f64 = simd_extract!(b, `0`);
42557	let extractc: f64 = simd_extract!(c, `0`);
42558	let extractc: f64 = -extractc;
42559	fnmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
42560	}
42561	simd_insert!(a, `0`, fnmsub)
42562	}
42563	}
42564
42565	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
42566	///
42567	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42568	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42569	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42570	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42571	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42572	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42573	///
42574	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_sd&expand=2786)
42575	#[inline]
42576	#[target_feature(enable = "avx512f")]
42577	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42578	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
42579	#[rustc_legacy_const_generics(`4`)]
42580	pub fn _mm_maskz_fnmsub_round_sd<const ROUNDING: i32>(
42581	k: __mmask8,
42582	a: __m128d,
42583	b: __m128d,
42584	c: __m128d,
42585	) -> __m128d {
42586	unsafe {
42587	static_assert_rounding!(ROUNDING);
42588	let mut fnmsub: f64 = `0.`;
42589	if (k & `0b00000001`) != `0` {
42590	let extracta: f64 = simd_extract!(a, `0`);
42591	let extracta: f64 = -extracta;
42592	let extractb: f64 = simd_extract!(b, `0`);
42593	let extractc: f64 = simd_extract!(c, `0`);
42594	let extractc: f64 = -extractc;
42595	fnmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
42596	}
42597	simd_insert!(a, `0`, fnmsub)
42598	}
42599	}
42600
42601	/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
42602	///
42603	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42604	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42605	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42606	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42607	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42608	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42609	///
42610	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_sd&expand=2785)
42611	#[inline]
42612	#[target_feature(enable = "avx512f")]
42613	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42614	#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = `8`))]
42615	#[rustc_legacy_const_generics(`4`)]
42616	pub fn _mm_mask3_fnmsub_round_sd<const ROUNDING: i32>(
42617	a: __m128d,
42618	b: __m128d,
42619	c: __m128d,
42620	k: __mmask8,
42621	) -> __m128d {
42622	unsafe {
42623	static_assert_rounding!(ROUNDING);
42624	let mut fnmsub: f64 = simd_extract!(c, `0`);
42625	if (k & `0b00000001`) != `0` {
42626	let extracta: f64 = simd_extract!(a, `0`);
42627	let extracta: f64 = -extracta;
42628	let extractb: f64 = simd_extract!(b, `0`);
42629	let extractc: f64 = -fnmsub;
42630	fnmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
42631	}
42632	simd_insert!(c, `0`, fnmsub)
42633	}
42634	}
42635
42636	/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
42637	///
42638	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_ss&expand=2517)
42639	#[inline]
42640	#[target_feature(enable = "avx512f")]
42641	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42642	#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = `0`))]
42643	#[rustc_legacy_const_generics(`3`)]
42644	pub fn _mm_fixupimm_ss<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
42645	unsafe {
42646	static_assert_uimm_bits!(IMM8, `8`);
42647	let a: Simd = a.as_f32x4();
42648	let b: Simd = b.as_f32x4();
42649	let c: Simd = c.as_i32x4();
42650	let r: Simd = vfixupimmss(a, b, c, IMM8, mask:`0b11111111`, _MM_FROUND_CUR_DIRECTION);
42651	let fixupimm: f32 = simd_extract!(r, `0`);
42652	let r: Simd = simd_insert!(a, `0`, fixupimm);
42653	transmute(src:r)
42654	}
42655	}
42656
42657	/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
42658	///
42659	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_ss&expand=2518)
42660	#[inline]
42661	#[target_feature(enable = "avx512f")]
42662	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42663	#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = `0`))]
42664	#[rustc_legacy_const_generics(`4`)]
42665	pub fn _mm_mask_fixupimm_ss<const IMM8: i32>(
42666	a: __m128,
42667	k: __mmask8,
42668	b: __m128,
42669	c: __m128i,
42670	) -> __m128 {
42671	unsafe {
42672	static_assert_uimm_bits!(IMM8, `8`);
42673	let a: Simd = a.as_f32x4();
42674	let b: Simd = b.as_f32x4();
42675	let c: Simd = c.as_i32x4();
42676	let fixupimm: Simd = vfixupimmss(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
42677	let fixupimm: f32 = simd_extract!(fixupimm, `0`);
42678	let r: Simd = simd_insert!(a, `0`, fixupimm);
42679	transmute(src:r)
42680	}
42681	}
42682
42683	/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
42684	///
42685	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_ss&expand=2519)
42686	#[inline]
42687	#[target_feature(enable = "avx512f")]
42688	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42689	#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = `0`))]
42690	#[rustc_legacy_const_generics(`4`)]
42691	pub fn _mm_maskz_fixupimm_ss<const IMM8: i32>(
42692	k: __mmask8,
42693	a: __m128,
42694	b: __m128,
42695	c: __m128i,
42696	) -> __m128 {
42697	unsafe {
42698	static_assert_uimm_bits!(IMM8, `8`);
42699	let a: Simd = a.as_f32x4();
42700	let b: Simd = b.as_f32x4();
42701	let c: Simd = c.as_i32x4();
42702	let fixupimm: Simd = vfixupimmssz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
42703	let fixupimm: f32 = simd_extract!(fixupimm, `0`);
42704	let r: Simd = simd_insert!(a, `0`, fixupimm);
42705	transmute(src:r)
42706	}
42707	}
42708
42709	/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
42710	///
42711	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_sd&expand=2514)
42712	#[inline]
42713	#[target_feature(enable = "avx512f")]
42714	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42715	#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = `0`))]
42716	#[rustc_legacy_const_generics(`3`)]
42717	pub fn _mm_fixupimm_sd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
42718	unsafe {
42719	static_assert_uimm_bits!(IMM8, `8`);
42720	let a: Simd = a.as_f64x2();
42721	let b: Simd = b.as_f64x2();
42722	let c: Simd = c.as_i64x2();
42723	let fixupimm: Simd = vfixupimmsd(a, b, c, IMM8, mask:`0b11111111`, _MM_FROUND_CUR_DIRECTION);
42724	let fixupimm: f64 = simd_extract!(fixupimm, `0`);
42725	let r: Simd = simd_insert!(a, `0`, fixupimm);
42726	transmute(src:r)
42727	}
42728	}
42729
42730	/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
42731	///
42732	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_sd&expand=2515)
42733	#[inline]
42734	#[target_feature(enable = "avx512f")]
42735	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42736	#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = `0`))]
42737	#[rustc_legacy_const_generics(`4`)]
42738	pub fn _mm_mask_fixupimm_sd<const IMM8: i32>(
42739	a: __m128d,
42740	k: __mmask8,
42741	b: __m128d,
42742	c: __m128i,
42743	) -> __m128d {
42744	unsafe {
42745	static_assert_uimm_bits!(IMM8, `8`);
42746	let a: Simd = a.as_f64x2();
42747	let b: Simd = b.as_f64x2();
42748	let c: Simd = c.as_i64x2();
42749	let fixupimm: Simd = vfixupimmsd(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
42750	let fixupimm: f64 = simd_extract!(fixupimm, `0`);
42751	let r: Simd = simd_insert!(a, `0`, fixupimm);
42752	transmute(src:r)
42753	}
42754	}
42755
42756	/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
42757	///
42758	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_sd&expand=2516)
42759	#[inline]
42760	#[target_feature(enable = "avx512f")]
42761	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42762	#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = `0`))]
42763	#[rustc_legacy_const_generics(`4`)]
42764	pub fn _mm_maskz_fixupimm_sd<const IMM8: i32>(
42765	k: __mmask8,
42766	a: __m128d,
42767	b: __m128d,
42768	c: __m128i,
42769	) -> __m128d {
42770	unsafe {
42771	static_assert_uimm_bits!(IMM8, `8`);
42772	let a: Simd = a.as_f64x2();
42773	let b: Simd = b.as_f64x2();
42774	let c: Simd = c.as_i64x2();
42775	let fixupimm: Simd = vfixupimmsdz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
42776	let fixupimm: f64 = simd_extract!(fixupimm, `0`);
42777	let r: Simd = simd_insert!(a, `0`, fixupimm);
42778	transmute(src:r)
42779	}
42780	}
42781
42782	/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
42783	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
42784	///
42785	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_ss&expand=2511)
42786	#[inline]
42787	#[target_feature(enable = "avx512f")]
42788	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42789	#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = `0`, SAE = `8`))]
42790	#[rustc_legacy_const_generics(`3`, `4`)]
42791	pub fn _mm_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
42792	a: __m128,
42793	b: __m128,
42794	c: __m128i,
42795	) -> __m128 {
42796	unsafe {
42797	static_assert_uimm_bits!(IMM8, `8`);
42798	static_assert_mantissas_sae!(SAE);
42799	let a: Simd = a.as_f32x4();
42800	let b: Simd = b.as_f32x4();
42801	let c: Simd = c.as_i32x4();
42802	let r: Simd = vfixupimmss(a, b, c, IMM8, mask:`0b11111111`, SAE);
42803	let fixupimm: f32 = simd_extract!(r, `0`);
42804	let r: Simd = simd_insert!(a, `0`, fixupimm);
42805	transmute(src:r)
42806	}
42807	}
42808
42809	/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
42810	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
42811	///
42812	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_ss&expand=2512)
42813	#[inline]
42814	#[target_feature(enable = "avx512f")]
42815	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42816	#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = `0`, SAE = `8`))]
42817	#[rustc_legacy_const_generics(`4`, `5`)]
42818	pub fn _mm_mask_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
42819	a: __m128,
42820	k: __mmask8,
42821	b: __m128,
42822	c: __m128i,
42823	) -> __m128 {
42824	unsafe {
42825	static_assert_uimm_bits!(IMM8, `8`);
42826	static_assert_mantissas_sae!(SAE);
42827	let a: Simd = a.as_f32x4();
42828	let b: Simd = b.as_f32x4();
42829	let c: Simd = c.as_i32x4();
42830	let r: Simd = vfixupimmss(a, b, c, IMM8, mask:k, SAE);
42831	let fixupimm: f32 = simd_extract!(r, `0`);
42832	let r: Simd = simd_insert!(a, `0`, fixupimm);
42833	transmute(src:r)
42834	}
42835	}
42836
42837	/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
42838	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
42839	///
42840	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_ss&expand=2513)
42841	#[inline]
42842	#[target_feature(enable = "avx512f")]
42843	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42844	#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = `0`, SAE = `8`))]
42845	#[rustc_legacy_const_generics(`4`, `5`)]
42846	pub fn _mm_maskz_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
42847	k: __mmask8,
42848	a: __m128,
42849	b: __m128,
42850	c: __m128i,
42851	) -> __m128 {
42852	unsafe {
42853	static_assert_uimm_bits!(IMM8, `8`);
42854	static_assert_mantissas_sae!(SAE);
42855	let a: Simd = a.as_f32x4();
42856	let b: Simd = b.as_f32x4();
42857	let c: Simd = c.as_i32x4();
42858	let r: Simd = vfixupimmssz(a, b, c, IMM8, mask:k, SAE);
42859	let fixupimm: f32 = simd_extract!(r, `0`);
42860	let r: Simd = simd_insert!(a, `0`, fixupimm);
42861	transmute(src:r)
42862	}
42863	}
42864
42865	/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
42866	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
42867	///
42868	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_sd&expand=2508)
42869	#[inline]
42870	#[target_feature(enable = "avx512f")]
42871	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42872	#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = `0`, SAE = `8`))]
42873	#[rustc_legacy_const_generics(`3`, `4`)]
42874	pub fn _mm_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
42875	a: __m128d,
42876	b: __m128d,
42877	c: __m128i,
42878	) -> __m128d {
42879	unsafe {
42880	static_assert_uimm_bits!(IMM8, `8`);
42881	static_assert_mantissas_sae!(SAE);
42882	let a: Simd = a.as_f64x2();
42883	let b: Simd = b.as_f64x2();
42884	let c: Simd = c.as_i64x2();
42885	let r: Simd = vfixupimmsd(a, b, c, IMM8, mask:`0b11111111`, SAE);
42886	let fixupimm: f64 = simd_extract!(r, `0`);
42887	let r: Simd = simd_insert!(a, `0`, fixupimm);
42888	transmute(src:r)
42889	}
42890	}
42891
42892	/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
42893	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
42894	///
42895	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_sd&expand=2509)
42896	#[inline]
42897	#[target_feature(enable = "avx512f")]
42898	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42899	#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = `0`, SAE = `8`))]
42900	#[rustc_legacy_const_generics(`4`, `5`)]
42901	pub fn _mm_mask_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
42902	a: __m128d,
42903	k: __mmask8,
42904	b: __m128d,
42905	c: __m128i,
42906	) -> __m128d {
42907	unsafe {
42908	static_assert_uimm_bits!(IMM8, `8`);
42909	static_assert_mantissas_sae!(SAE);
42910	let a: Simd = a.as_f64x2();
42911	let b: Simd = b.as_f64x2();
42912	let c: Simd = c.as_i64x2();
42913	let r: Simd = vfixupimmsd(a, b, c, IMM8, mask:k, SAE);
42914	let fixupimm: f64 = simd_extract!(r, `0`);
42915	let r: Simd = simd_insert!(a, `0`, fixupimm);
42916	transmute(src:r)
42917	}
42918	}
42919
42920	/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
42921	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
42922	///
42923	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_sd&expand=2510)
42924	#[inline]
42925	#[target_feature(enable = "avx512f")]
42926	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42927	#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = `0`, SAE = `8`))]
42928	#[rustc_legacy_const_generics(`4`, `5`)]
42929	pub fn _mm_maskz_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
42930	k: __mmask8,
42931	a: __m128d,
42932	b: __m128d,
42933	c: __m128i,
42934	) -> __m128d {
42935	unsafe {
42936	static_assert_uimm_bits!(IMM8, `8`);
42937	static_assert_mantissas_sae!(SAE);
42938	let a: Simd = a.as_f64x2();
42939	let b: Simd = b.as_f64x2();
42940	let c: Simd = c.as_i64x2();
42941	let r: Simd = vfixupimmsdz(a, b, c, IMM8, mask:k, SAE);
42942	let fixupimm: f64 = simd_extract!(r, `0`);
42943	let r: Simd = simd_insert!(a, `0`, fixupimm);
42944	transmute(src:r)
42945	}
42946	}
42947
42948	/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
42949	///
42950	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtss_sd&expand=1896)
42951	#[inline]
42952	#[target_feature(enable = "avx512f")]
42953	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42954	#[cfg_attr(test, assert_instr(vcvtss2sd))]
42955	pub fn _mm_mask_cvtss_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128) -> __m128d {
42956	unsafe {
42957	transmute(src:vcvtss2sd(
42958	a.as_f64x2(),
42959	b.as_f32x4(),
42960	src.as_f64x2(),
42961	mask:k,
42962	_MM_FROUND_CUR_DIRECTION,
42963	))
42964	}
42965	}
42966
42967	/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
42968	///
42969	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtss_sd&expand=1897)
42970	#[inline]
42971	#[target_feature(enable = "avx512f")]
42972	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42973	#[cfg_attr(test, assert_instr(vcvtss2sd))]
42974	pub fn _mm_maskz_cvtss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
42975	unsafe {
42976	transmute(src:vcvtss2sd(
42977	a.as_f64x2(),
42978	b.as_f32x4(),
42979	src:f64x2::ZERO,
42980	mask:k,
42981	_MM_FROUND_CUR_DIRECTION,
42982	))
42983	}
42984	}
42985
42986	/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
42987	///
42988	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtsd_ss&expand=1797)
42989	#[inline]
42990	#[target_feature(enable = "avx512f")]
42991	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42992	#[cfg_attr(test, assert_instr(vcvtsd2ss))]
42993	pub fn _mm_mask_cvtsd_ss(src: __m128, k: __mmask8, a: __m128, b: __m128d) -> __m128 {
42994	unsafe {
42995	transmute(src:vcvtsd2ss(
42996	a.as_f32x4(),
42997	b.as_f64x2(),
42998	src.as_f32x4(),
42999	mask:k,
43000	_MM_FROUND_CUR_DIRECTION,
43001	))
43002	}
43003	}
43004
43005	/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
43006	///
43007	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtsd_ss&expand=1798)
43008	#[inline]
43009	#[target_feature(enable = "avx512f")]
43010	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43011	#[cfg_attr(test, assert_instr(vcvtsd2ss))]
43012	pub fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
43013	unsafe {
43014	transmute(src:vcvtsd2ss(
43015	a.as_f32x4(),
43016	b.as_f64x2(),
43017	src:f32x4::ZERO,
43018	mask:k,
43019	_MM_FROUND_CUR_DIRECTION,
43020	))
43021	}
43022	}
43023
43024	/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
43025	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43026	///
43027	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_sd&expand=1371)
43028	#[inline]
43029	#[target_feature(enable = "avx512f")]
43030	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43031	#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = `8`))]
43032	#[rustc_legacy_const_generics(`2`)]
43033	pub fn _mm_cvt_roundss_sd<const SAE: i32>(a: __m128d, b: __m128) -> __m128d {
43034	unsafe {
43035	static_assert_sae!(SAE);
43036	let a: Simd = a.as_f64x2();
43037	let b: Simd = b.as_f32x4();
43038	let r: Simd = vcvtss2sd(a, b, src:f64x2::ZERO, mask:`0b11111111`, SAE);
43039	transmute(src:r)
43040	}
43041	}
43042
43043	/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
43044	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43045	///
43046	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundss_sd&expand=1372)
43047	#[inline]
43048	#[target_feature(enable = "avx512f")]
43049	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43050	#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = `8`))]
43051	#[rustc_legacy_const_generics(`4`)]
43052	pub fn _mm_mask_cvt_roundss_sd<const SAE: i32>(
43053	src: __m128d,
43054	k: __mmask8,
43055	a: __m128d,
43056	b: __m128,
43057	) -> __m128d {
43058	unsafe {
43059	static_assert_sae!(SAE);
43060	let a: Simd = a.as_f64x2();
43061	let b: Simd = b.as_f32x4();
43062	let src: Simd = src.as_f64x2();
43063	let r: Simd = vcvtss2sd(a, b, src, mask:k, SAE);
43064	transmute(src:r)
43065	}
43066	}
43067
43068	/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
43069	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43070	///
43071	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundss_sd&expand=1373)
43072	#[inline]
43073	#[target_feature(enable = "avx512f")]
43074	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43075	#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = `8`))]
43076	#[rustc_legacy_const_generics(`3`)]
43077	pub fn _mm_maskz_cvt_roundss_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
43078	unsafe {
43079	static_assert_sae!(SAE);
43080	let a: Simd = a.as_f64x2();
43081	let b: Simd = b.as_f32x4();
43082	let r: Simd = vcvtss2sd(a, b, src:f64x2::ZERO, mask:k, SAE);
43083	transmute(src:r)
43084	}
43085	}
43086
43087	/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
43088	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43089	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43090	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43091	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43092	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43093	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43094	///
43095	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_ss&expand=1361)
43096	#[inline]
43097	#[target_feature(enable = "avx512f")]
43098	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43099	#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = `8`))]
43100	#[rustc_legacy_const_generics(`2`)]
43101	pub fn _mm_cvt_roundsd_ss<const ROUNDING: i32>(a: __m128, b: __m128d) -> __m128 {
43102	unsafe {
43103	static_assert_rounding!(ROUNDING);
43104	let a: Simd = a.as_f32x4();
43105	let b: Simd = b.as_f64x2();
43106	let r: Simd = vcvtsd2ss(a, b, src:f32x4::ZERO, mask:`0b11111111`, ROUNDING);
43107	transmute(src:r)
43108	}
43109	}
43110
43111	/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
43112	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43113	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43114	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43115	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43116	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43117	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43118	///
43119	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundsd_ss&expand=1362)
43120	#[inline]
43121	#[target_feature(enable = "avx512f")]
43122	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43123	#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = `8`))]
43124	#[rustc_legacy_const_generics(`4`)]
43125	pub fn _mm_mask_cvt_roundsd_ss<const ROUNDING: i32>(
43126	src: __m128,
43127	k: __mmask8,
43128	a: __m128,
43129	b: __m128d,
43130	) -> __m128 {
43131	unsafe {
43132	static_assert_rounding!(ROUNDING);
43133	let a: Simd = a.as_f32x4();
43134	let b: Simd = b.as_f64x2();
43135	let src: Simd = src.as_f32x4();
43136	let r: Simd = vcvtsd2ss(a, b, src, mask:k, ROUNDING);
43137	transmute(src:r)
43138	}
43139	}
43140
43141	/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
43142	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43143	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43144	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43145	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43146	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43147	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43148	///
43149	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundsd_ss&expand=1363)
43150	#[inline]
43151	#[target_feature(enable = "avx512f")]
43152	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43153	#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = `8`))]
43154	#[rustc_legacy_const_generics(`3`)]
43155	pub fn _mm_maskz_cvt_roundsd_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
43156	unsafe {
43157	static_assert_rounding!(ROUNDING);
43158	let a: Simd = a.as_f32x4();
43159	let b: Simd = b.as_f64x2();
43160	let r: Simd = vcvtsd2ss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
43161	transmute(src:r)
43162	}
43163	}
43164
43165	/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
43166	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43167	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43168	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43169	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43170	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43171	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43172	///
43173	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_si32&expand=1374)
43174	#[inline]
43175	#[target_feature(enable = "avx512f")]
43176	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43177	#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = `8`))]
43178	#[rustc_legacy_const_generics(`1`)]
43179	pub fn _mm_cvt_roundss_si32<const ROUNDING: i32>(a: __m128) -> i32 {
43180	unsafe {
43181	static_assert_rounding!(ROUNDING);
43182	let a: Simd = a.as_f32x4();
43183	vcvtss2si(a, ROUNDING)
43184	}
43185	}
43186
43187	/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
43188	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43189	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43190	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43191	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43192	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43193	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43194	///
43195	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_i32&expand=1369)
43196	#[inline]
43197	#[target_feature(enable = "avx512f")]
43198	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43199	#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = `8`))]
43200	#[rustc_legacy_const_generics(`1`)]
43201	pub fn _mm_cvt_roundss_i32<const ROUNDING: i32>(a: __m128) -> i32 {
43202	unsafe {
43203	static_assert_rounding!(ROUNDING);
43204	let a: Simd = a.as_f32x4();
43205	vcvtss2si(a, ROUNDING)
43206	}
43207	}
43208
43209	/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
43210	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43211	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43212	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43213	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43214	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43215	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43216	///
43217	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_u32&expand=1376)
43218	#[inline]
43219	#[target_feature(enable = "avx512f")]
43220	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43221	#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = `8`))]
43222	#[rustc_legacy_const_generics(`1`)]
43223	pub fn _mm_cvt_roundss_u32<const ROUNDING: i32>(a: __m128) -> u32 {
43224	unsafe {
43225	static_assert_rounding!(ROUNDING);
43226	let a: Simd = a.as_f32x4();
43227	vcvtss2usi(a, ROUNDING)
43228	}
43229	}
43230
43231	/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
43232	///
43233	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_i32&expand=1893)
43234	#[inline]
43235	#[target_feature(enable = "avx512f")]
43236	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43237	#[cfg_attr(test, assert_instr(vcvtss2si))]
43238	pub fn _mm_cvtss_i32(a: __m128) -> i32 {
43239	unsafe { vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
43240	}
43241
43242	/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
43243	///
43244	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_u32&expand=1901)
43245	#[inline]
43246	#[target_feature(enable = "avx512f")]
43247	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43248	#[cfg_attr(test, assert_instr(vcvtss2usi))]
43249	pub fn _mm_cvtss_u32(a: __m128) -> u32 {
43250	unsafe { vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
43251	}
43252
43253	/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
43254	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43255	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43256	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43257	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43258	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43259	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43260	///
43261	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_si32&expand=1359)
43262	#[inline]
43263	#[target_feature(enable = "avx512f")]
43264	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43265	#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = `8`))]
43266	#[rustc_legacy_const_generics(`1`)]
43267	pub fn _mm_cvt_roundsd_si32<const ROUNDING: i32>(a: __m128d) -> i32 {
43268	unsafe {
43269	static_assert_rounding!(ROUNDING);
43270	let a: Simd = a.as_f64x2();
43271	vcvtsd2si(a, ROUNDING)
43272	}
43273	}
43274
43275	/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
43276	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43277	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43278	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43279	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43280	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43281	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43282	///
43283	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_i32&expand=1357)
43284	#[inline]
43285	#[target_feature(enable = "avx512f")]
43286	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43287	#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = `8`))]
43288	#[rustc_legacy_const_generics(`1`)]
43289	pub fn _mm_cvt_roundsd_i32<const ROUNDING: i32>(a: __m128d) -> i32 {
43290	unsafe {
43291	static_assert_rounding!(ROUNDING);
43292	let a: Simd = a.as_f64x2();
43293	vcvtsd2si(a, ROUNDING)
43294	}
43295	}
43296
43297	/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
43298	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43299	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43300	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43301	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43302	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43303	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43304	///
43305	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsd_u32&expand=1364)
43306	#[inline]
43307	#[target_feature(enable = "avx512f")]
43308	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43309	#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = `8`))]
43310	#[rustc_legacy_const_generics(`1`)]
43311	pub fn _mm_cvt_roundsd_u32<const ROUNDING: i32>(a: __m128d) -> u32 {
43312	unsafe {
43313	static_assert_rounding!(ROUNDING);
43314	let a: Simd = a.as_f64x2();
43315	vcvtsd2usi(a, ROUNDING)
43316	}
43317	}
43318
43319	/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
43320	///
43321	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_i32&expand=1791)
43322	#[inline]
43323	#[target_feature(enable = "avx512f")]
43324	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43325	#[cfg_attr(test, assert_instr(vcvtsd2si))]
43326	pub fn _mm_cvtsd_i32(a: __m128d) -> i32 {
43327	unsafe { vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
43328	}
43329
43330	/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
43331	///
43332	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_u32&expand=1799)
43333	#[inline]
43334	#[target_feature(enable = "avx512f")]
43335	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43336	#[cfg_attr(test, assert_instr(vcvtsd2usi))]
43337	pub fn _mm_cvtsd_u32(a: __m128d) -> u32 {
43338	unsafe { vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
43339	}
43340
43341	/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
43342	///
43343	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43344	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43345	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43346	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43347	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43348	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43349	///
43350	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundi32_ss&expand=1312)
43351	#[inline]
43352	#[target_feature(enable = "avx512f")]
43353	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43354	#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = `8`))]
43355	#[rustc_legacy_const_generics(`2`)]
43356	pub fn _mm_cvt_roundi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
43357	unsafe {
43358	static_assert_rounding!(ROUNDING);
43359	let a: Simd = a.as_f32x4();
43360	let r: Simd = vcvtsi2ss(a, b, ROUNDING);
43361	transmute(src:r)
43362	}
43363	}
43364
43365	/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
43366	///
43367	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43368	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43369	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43370	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43371	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43372	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43373	///
43374	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsi32_ss&expand=1366)
43375	#[inline]
43376	#[target_feature(enable = "avx512f")]
43377	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43378	#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = `8`))]
43379	#[rustc_legacy_const_generics(`2`)]
43380	pub fn _mm_cvt_roundsi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
43381	unsafe {
43382	static_assert_rounding!(ROUNDING);
43383	let a: Simd = a.as_f32x4();
43384	let r: Simd = vcvtsi2ss(a, b, ROUNDING);
43385	transmute(src:r)
43386	}
43387	}
43388
43389	/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
43390	/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43391	/// * [`_MM_FROUND_TO_NEAREST_INT`] \| [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43392	/// * [`_MM_FROUND_TO_NEG_INF`] \| [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43393	/// * [`_MM_FROUND_TO_POS_INF`] \| [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43394	/// * [`_MM_FROUND_TO_ZERO`] \| [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43395	/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43396	///
43397	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundu32_ss&expand=1378)
43398	#[inline]
43399	#[target_feature(enable = "avx512f")]
43400	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43401	#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = `8`))]
43402	#[rustc_legacy_const_generics(`2`)]
43403	pub fn _mm_cvt_roundu32_ss<const ROUNDING: i32>(a: __m128, b: u32) -> __m128 {
43404	unsafe {
43405	static_assert_rounding!(ROUNDING);
43406	let a: Simd = a.as_f32x4();
43407	let r: Simd = vcvtusi2ss(a, b, ROUNDING);
43408	transmute(src:r)
43409	}
43410	}
43411
43412	/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
43413	///
43414	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_ss&expand=1643)
43415	#[inline]
43416	#[target_feature(enable = "avx512f")]
43417	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43418	#[cfg_attr(test, assert_instr(vcvtsi2ss))]
43419	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
43420	pub const fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 {
43421	unsafe {
43422	let b: f32 = b as f32;
43423	simd_insert!(a, `0`, b)
43424	}
43425	}
43426
43427	/// Convert the signed 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
43428	///
43429	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_sd&expand=1642)
43430	#[inline]
43431	#[target_feature(enable = "avx512f")]
43432	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43433	#[cfg_attr(test, assert_instr(vcvtsi2sd))]
43434	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
43435	pub const fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d {
43436	unsafe {
43437	let b: f64 = b as f64;
43438	simd_insert!(a, `0`, b)
43439	}
43440	}
43441
43442	/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
43443	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43444	///
43445	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_si32&expand=1936)
43446	#[inline]
43447	#[target_feature(enable = "avx512f")]
43448	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43449	#[cfg_attr(test, assert_instr(vcvttss2si, SAE = `8`))]
43450	#[rustc_legacy_const_generics(`1`)]
43451	pub fn _mm_cvtt_roundss_si32<const SAE: i32>(a: __m128) -> i32 {
43452	unsafe {
43453	static_assert_sae!(SAE);
43454	let a: Simd = a.as_f32x4();
43455	vcvttss2si(a, SAE)
43456	}
43457	}
43458
43459	/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
43460	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43461	///
43462	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_i32&expand=1934)
43463	#[inline]
43464	#[target_feature(enable = "avx512f")]
43465	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43466	#[cfg_attr(test, assert_instr(vcvttss2si, SAE = `8`))]
43467	#[rustc_legacy_const_generics(`1`)]
43468	pub fn _mm_cvtt_roundss_i32<const SAE: i32>(a: __m128) -> i32 {
43469	unsafe {
43470	static_assert_sae!(SAE);
43471	let a: Simd = a.as_f32x4();
43472	vcvttss2si(a, SAE)
43473	}
43474	}
43475
43476	/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
43477	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43478	///
43479	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_u32&expand=1938)
43480	#[inline]
43481	#[target_feature(enable = "avx512f")]
43482	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43483	#[cfg_attr(test, assert_instr(vcvttss2usi, SAE = `8`))]
43484	#[rustc_legacy_const_generics(`1`)]
43485	pub fn _mm_cvtt_roundss_u32<const SAE: i32>(a: __m128) -> u32 {
43486	unsafe {
43487	static_assert_sae!(SAE);
43488	let a: Simd = a.as_f32x4();
43489	vcvttss2usi(a, SAE)
43490	}
43491	}
43492
43493	/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
43494	///
43495	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_i32&expand=2022)
43496	#[inline]
43497	#[target_feature(enable = "avx512f")]
43498	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43499	#[cfg_attr(test, assert_instr(vcvttss2si))]
43500	pub fn _mm_cvttss_i32(a: __m128) -> i32 {
43501	unsafe { vcvttss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
43502	}
43503
43504	/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
43505	///
43506	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_u32&expand=2026)
43507	#[inline]
43508	#[target_feature(enable = "avx512f")]
43509	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43510	#[cfg_attr(test, assert_instr(vcvttss2usi))]
43511	pub fn _mm_cvttss_u32(a: __m128) -> u32 {
43512	unsafe { vcvttss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
43513	}
43514
43515	/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
43516	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43517	///
43518	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_si32&expand=1930)
43519	#[inline]
43520	#[target_feature(enable = "avx512f")]
43521	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43522	#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = `8`))]
43523	#[rustc_legacy_const_generics(`1`)]
43524	pub fn _mm_cvtt_roundsd_si32<const SAE: i32>(a: __m128d) -> i32 {
43525	unsafe {
43526	static_assert_sae!(SAE);
43527	let a: Simd = a.as_f64x2();
43528	vcvttsd2si(a, SAE)
43529	}
43530	}
43531
43532	/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
43533	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43534	///
43535	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_i32&expand=1928)
43536	#[inline]
43537	#[target_feature(enable = "avx512f")]
43538	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43539	#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = `8`))]
43540	#[rustc_legacy_const_generics(`1`)]
43541	pub fn _mm_cvtt_roundsd_i32<const SAE: i32>(a: __m128d) -> i32 {
43542	unsafe {
43543	static_assert_sae!(SAE);
43544	let a: Simd = a.as_f64x2();
43545	vcvttsd2si(a, SAE)
43546	}
43547	}
43548
43549	/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
43550	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43551	///
43552	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundsd_u32&expand=1932)
43553	#[inline]
43554	#[target_feature(enable = "avx512f")]
43555	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43556	#[cfg_attr(test, assert_instr(vcvttsd2usi, SAE = `8`))]
43557	#[rustc_legacy_const_generics(`1`)]
43558	pub fn _mm_cvtt_roundsd_u32<const SAE: i32>(a: __m128d) -> u32 {
43559	unsafe {
43560	static_assert_sae!(SAE);
43561	let a: Simd = a.as_f64x2();
43562	vcvttsd2usi(a, SAE)
43563	}
43564	}
43565
43566	/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
43567	///
43568	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_i32&expand=2015)
43569	#[inline]
43570	#[target_feature(enable = "avx512f")]
43571	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43572	#[cfg_attr(test, assert_instr(vcvttsd2si))]
43573	pub fn _mm_cvttsd_i32(a: __m128d) -> i32 {
43574	unsafe { vcvttsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
43575	}
43576
43577	/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
43578	///
43579	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_u32&expand=2020)
43580	#[inline]
43581	#[target_feature(enable = "avx512f")]
43582	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43583	#[cfg_attr(test, assert_instr(vcvttsd2usi))]
43584	pub fn _mm_cvttsd_u32(a: __m128d) -> u32 {
43585	unsafe { vcvttsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
43586	}
43587
43588	/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
43589	///
43590	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_ss&expand=2032)
43591	#[inline]
43592	#[target_feature(enable = "avx512f")]
43593	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43594	#[cfg_attr(test, assert_instr(vcvtusi2ss))]
43595	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
43596	pub const fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
43597	unsafe {
43598	let b: f32 = b as f32;
43599	simd_insert!(a, `0`, b)
43600	}
43601	}
43602
43603	/// Convert the unsigned 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
43604	///
43605	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_sd&expand=2031)
43606	#[inline]
43607	#[target_feature(enable = "avx512f")]
43608	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43609	#[cfg_attr(test, assert_instr(vcvtusi2sd))]
43610	#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
43611	pub const fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d {
43612	unsafe {
43613	let b: f64 = b as f64;
43614	simd_insert!(a, `0`, b)
43615	}
43616	}
43617
43618	/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
43619	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43620	///
43621	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_ss&expand=1175)
43622	#[inline]
43623	#[target_feature(enable = "avx512f")]
43624	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43625	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `5`, SAE = `4`))] //should be vcomiss
43626	#[rustc_legacy_const_generics(`2`, `3`)]
43627	pub fn _mm_comi_round_ss<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> i32 {
43628	unsafe {
43629	static_assert_uimm_bits!(IMM5, `5`);
43630	static_assert_mantissas_sae!(SAE);
43631	let a: Simd = a.as_f32x4();
43632	let b: Simd = b.as_f32x4();
43633	vcomiss(a, b, IMM5, SAE)
43634	}
43635	}
43636
43637	/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
43638	/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43639	///
43640	/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_sd&expand=1174)
43641	#[inline]
43642	#[target_feature(enable = "avx512f")]
43643	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43644	#[cfg_attr(test, assert_instr(vcmp, IMM5 = `5`, SAE = `4`))] //should be vcomisd
43645	#[rustc_legacy_const_generics(`2`, `3`)]
43646	pub fn _mm_comi_round_sd<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> i32 {
43647	unsafe {
43648	static_assert_uimm_bits!(IMM5, `5`);
43649	static_assert_mantissas_sae!(SAE);
43650	let a: Simd = a.as_f64x2();
43651	let b: Simd = b.as_f64x2();
43652	vcomisd(a, b, IMM5, SAE)
43653	}
43654	}
43655
43656	/// Equal
43657	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43658	pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = `0x00`;
43659	/// Less-than
43660	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43661	pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = `0x01`;
43662	/// Less-than-or-equal
43663	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43664	pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = `0x02`;
43665	/// False
43666	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43667	pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = `0x03`;
43668	/// Not-equal
43669	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43670	pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = `0x04`;
43671	/// Not less-than
43672	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43673	pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = `0x05`;
43674	/// Not less-than-or-equal
43675	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43676	pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = `0x06`;
43677	/// True
43678	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43679	pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = `0x07`;
43680
43681	/// interval [1, 2)
43682	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43683	pub const _MM_MANT_NORM_1_2: _MM_MANTISSA_NORM_ENUM = `0x00`;
43684	/// interval [0.5, 2)
43685	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43686	pub const _MM_MANT_NORM_P5_2: _MM_MANTISSA_NORM_ENUM = `0x01`;
43687	/// interval [0.5, 1)
43688	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43689	pub const _MM_MANT_NORM_P5_1: _MM_MANTISSA_NORM_ENUM = `0x02`;
43690	/// interval [0.75, 1.5)
43691	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43692	pub const _MM_MANT_NORM_P75_1P5: _MM_MANTISSA_NORM_ENUM = `0x03`;
43693
43694	/// sign = sign(SRC)
43695	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43696	pub const _MM_MANT_SIGN_SRC: _MM_MANTISSA_SIGN_ENUM = `0x00`;
43697	/// sign = 0
43698	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43699	pub const _MM_MANT_SIGN_ZERO: _MM_MANTISSA_SIGN_ENUM = `0x01`;
43700	/// DEST = NaN if sign(SRC) = 1
43701	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43702	pub const _MM_MANT_SIGN_NAN: _MM_MANTISSA_SIGN_ENUM = `0x02`;
43703
43704	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43705	pub const _MM_PERM_AAAA: _MM_PERM_ENUM = `0x00`;
43706	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43707	pub const _MM_PERM_AAAB: _MM_PERM_ENUM = `0x01`;
43708	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43709	pub const _MM_PERM_AAAC: _MM_PERM_ENUM = `0x02`;
43710	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43711	pub const _MM_PERM_AAAD: _MM_PERM_ENUM = `0x03`;
43712	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43713	pub const _MM_PERM_AABA: _MM_PERM_ENUM = `0x04`;
43714	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43715	pub const _MM_PERM_AABB: _MM_PERM_ENUM = `0x05`;
43716	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43717	pub const _MM_PERM_AABC: _MM_PERM_ENUM = `0x06`;
43718	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43719	pub const _MM_PERM_AABD: _MM_PERM_ENUM = `0x07`;
43720	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43721	pub const _MM_PERM_AACA: _MM_PERM_ENUM = `0x08`;
43722	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43723	pub const _MM_PERM_AACB: _MM_PERM_ENUM = `0x09`;
43724	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43725	pub const _MM_PERM_AACC: _MM_PERM_ENUM = `0x0A`;
43726	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43727	pub const _MM_PERM_AACD: _MM_PERM_ENUM = `0x0B`;
43728	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43729	pub const _MM_PERM_AADA: _MM_PERM_ENUM = `0x0C`;
43730	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43731	pub const _MM_PERM_AADB: _MM_PERM_ENUM = `0x0D`;
43732	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43733	pub const _MM_PERM_AADC: _MM_PERM_ENUM = `0x0E`;
43734	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43735	pub const _MM_PERM_AADD: _MM_PERM_ENUM = `0x0F`;
43736	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43737	pub const _MM_PERM_ABAA: _MM_PERM_ENUM = `0x10`;
43738	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43739	pub const _MM_PERM_ABAB: _MM_PERM_ENUM = `0x11`;
43740	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43741	pub const _MM_PERM_ABAC: _MM_PERM_ENUM = `0x12`;
43742	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43743	pub const _MM_PERM_ABAD: _MM_PERM_ENUM = `0x13`;
43744	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43745	pub const _MM_PERM_ABBA: _MM_PERM_ENUM = `0x14`;
43746	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43747	pub const _MM_PERM_ABBB: _MM_PERM_ENUM = `0x15`;
43748	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43749	pub const _MM_PERM_ABBC: _MM_PERM_ENUM = `0x16`;
43750	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43751	pub const _MM_PERM_ABBD: _MM_PERM_ENUM = `0x17`;
43752	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43753	pub const _MM_PERM_ABCA: _MM_PERM_ENUM = `0x18`;
43754	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43755	pub const _MM_PERM_ABCB: _MM_PERM_ENUM = `0x19`;
43756	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43757	pub const _MM_PERM_ABCC: _MM_PERM_ENUM = `0x1A`;
43758	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43759	pub const _MM_PERM_ABCD: _MM_PERM_ENUM = `0x1B`;
43760	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43761	pub const _MM_PERM_ABDA: _MM_PERM_ENUM = `0x1C`;
43762	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43763	pub const _MM_PERM_ABDB: _MM_PERM_ENUM = `0x1D`;
43764	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43765	pub const _MM_PERM_ABDC: _MM_PERM_ENUM = `0x1E`;
43766	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43767	pub const _MM_PERM_ABDD: _MM_PERM_ENUM = `0x1F`;
43768	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43769	pub const _MM_PERM_ACAA: _MM_PERM_ENUM = `0x20`;
43770	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43771	pub const _MM_PERM_ACAB: _MM_PERM_ENUM = `0x21`;
43772	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43773	pub const _MM_PERM_ACAC: _MM_PERM_ENUM = `0x22`;
43774	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43775	pub const _MM_PERM_ACAD: _MM_PERM_ENUM = `0x23`;
43776	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43777	pub const _MM_PERM_ACBA: _MM_PERM_ENUM = `0x24`;
43778	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43779	pub const _MM_PERM_ACBB: _MM_PERM_ENUM = `0x25`;
43780	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43781	pub const _MM_PERM_ACBC: _MM_PERM_ENUM = `0x26`;
43782	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43783	pub const _MM_PERM_ACBD: _MM_PERM_ENUM = `0x27`;
43784	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43785	pub const _MM_PERM_ACCA: _MM_PERM_ENUM = `0x28`;
43786	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43787	pub const _MM_PERM_ACCB: _MM_PERM_ENUM = `0x29`;
43788	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43789	pub const _MM_PERM_ACCC: _MM_PERM_ENUM = `0x2A`;
43790	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43791	pub const _MM_PERM_ACCD: _MM_PERM_ENUM = `0x2B`;
43792	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43793	pub const _MM_PERM_ACDA: _MM_PERM_ENUM = `0x2C`;
43794	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43795	pub const _MM_PERM_ACDB: _MM_PERM_ENUM = `0x2D`;
43796	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43797	pub const _MM_PERM_ACDC: _MM_PERM_ENUM = `0x2E`;
43798	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43799	pub const _MM_PERM_ACDD: _MM_PERM_ENUM = `0x2F`;
43800	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43801	pub const _MM_PERM_ADAA: _MM_PERM_ENUM = `0x30`;
43802	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43803	pub const _MM_PERM_ADAB: _MM_PERM_ENUM = `0x31`;
43804	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43805	pub const _MM_PERM_ADAC: _MM_PERM_ENUM = `0x32`;
43806	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43807	pub const _MM_PERM_ADAD: _MM_PERM_ENUM = `0x33`;
43808	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43809	pub const _MM_PERM_ADBA: _MM_PERM_ENUM = `0x34`;
43810	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43811	pub const _MM_PERM_ADBB: _MM_PERM_ENUM = `0x35`;
43812	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43813	pub const _MM_PERM_ADBC: _MM_PERM_ENUM = `0x36`;
43814	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43815	pub const _MM_PERM_ADBD: _MM_PERM_ENUM = `0x37`;
43816	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43817	pub const _MM_PERM_ADCA: _MM_PERM_ENUM = `0x38`;
43818	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43819	pub const _MM_PERM_ADCB: _MM_PERM_ENUM = `0x39`;
43820	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43821	pub const _MM_PERM_ADCC: _MM_PERM_ENUM = `0x3A`;
43822	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43823	pub const _MM_PERM_ADCD: _MM_PERM_ENUM = `0x3B`;
43824	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43825	pub const _MM_PERM_ADDA: _MM_PERM_ENUM = `0x3C`;
43826	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43827	pub const _MM_PERM_ADDB: _MM_PERM_ENUM = `0x3D`;
43828	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43829	pub const _MM_PERM_ADDC: _MM_PERM_ENUM = `0x3E`;
43830	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43831	pub const _MM_PERM_ADDD: _MM_PERM_ENUM = `0x3F`;
43832	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43833	pub const _MM_PERM_BAAA: _MM_PERM_ENUM = `0x40`;
43834	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43835	pub const _MM_PERM_BAAB: _MM_PERM_ENUM = `0x41`;
43836	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43837	pub const _MM_PERM_BAAC: _MM_PERM_ENUM = `0x42`;
43838	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43839	pub const _MM_PERM_BAAD: _MM_PERM_ENUM = `0x43`;
43840	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43841	pub const _MM_PERM_BABA: _MM_PERM_ENUM = `0x44`;
43842	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43843	pub const _MM_PERM_BABB: _MM_PERM_ENUM = `0x45`;
43844	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43845	pub const _MM_PERM_BABC: _MM_PERM_ENUM = `0x46`;
43846	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43847	pub const _MM_PERM_BABD: _MM_PERM_ENUM = `0x47`;
43848	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43849	pub const _MM_PERM_BACA: _MM_PERM_ENUM = `0x48`;
43850	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43851	pub const _MM_PERM_BACB: _MM_PERM_ENUM = `0x49`;
43852	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43853	pub const _MM_PERM_BACC: _MM_PERM_ENUM = `0x4A`;
43854	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43855	pub const _MM_PERM_BACD: _MM_PERM_ENUM = `0x4B`;
43856	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43857	pub const _MM_PERM_BADA: _MM_PERM_ENUM = `0x4C`;
43858	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43859	pub const _MM_PERM_BADB: _MM_PERM_ENUM = `0x4D`;
43860	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43861	pub const _MM_PERM_BADC: _MM_PERM_ENUM = `0x4E`;
43862	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43863	pub const _MM_PERM_BADD: _MM_PERM_ENUM = `0x4F`;
43864	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43865	pub const _MM_PERM_BBAA: _MM_PERM_ENUM = `0x50`;
43866	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43867	pub const _MM_PERM_BBAB: _MM_PERM_ENUM = `0x51`;
43868	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43869	pub const _MM_PERM_BBAC: _MM_PERM_ENUM = `0x52`;
43870	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43871	pub const _MM_PERM_BBAD: _MM_PERM_ENUM = `0x53`;
43872	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43873	pub const _MM_PERM_BBBA: _MM_PERM_ENUM = `0x54`;
43874	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43875	pub const _MM_PERM_BBBB: _MM_PERM_ENUM = `0x55`;
43876	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43877	pub const _MM_PERM_BBBC: _MM_PERM_ENUM = `0x56`;
43878	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43879	pub const _MM_PERM_BBBD: _MM_PERM_ENUM = `0x57`;
43880	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43881	pub const _MM_PERM_BBCA: _MM_PERM_ENUM = `0x58`;
43882	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43883	pub const _MM_PERM_BBCB: _MM_PERM_ENUM = `0x59`;
43884	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43885	pub const _MM_PERM_BBCC: _MM_PERM_ENUM = `0x5A`;
43886	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43887	pub const _MM_PERM_BBCD: _MM_PERM_ENUM = `0x5B`;
43888	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43889	pub const _MM_PERM_BBDA: _MM_PERM_ENUM = `0x5C`;
43890	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43891	pub const _MM_PERM_BBDB: _MM_PERM_ENUM = `0x5D`;
43892	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43893	pub const _MM_PERM_BBDC: _MM_PERM_ENUM = `0x5E`;
43894	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43895	pub const _MM_PERM_BBDD: _MM_PERM_ENUM = `0x5F`;
43896	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43897	pub const _MM_PERM_BCAA: _MM_PERM_ENUM = `0x60`;
43898	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43899	pub const _MM_PERM_BCAB: _MM_PERM_ENUM = `0x61`;
43900	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43901	pub const _MM_PERM_BCAC: _MM_PERM_ENUM = `0x62`;
43902	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43903	pub const _MM_PERM_BCAD: _MM_PERM_ENUM = `0x63`;
43904	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43905	pub const _MM_PERM_BCBA: _MM_PERM_ENUM = `0x64`;
43906	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43907	pub const _MM_PERM_BCBB: _MM_PERM_ENUM = `0x65`;
43908	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43909	pub const _MM_PERM_BCBC: _MM_PERM_ENUM = `0x66`;
43910	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43911	pub const _MM_PERM_BCBD: _MM_PERM_ENUM = `0x67`;
43912	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43913	pub const _MM_PERM_BCCA: _MM_PERM_ENUM = `0x68`;
43914	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43915	pub const _MM_PERM_BCCB: _MM_PERM_ENUM = `0x69`;
43916	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43917	pub const _MM_PERM_BCCC: _MM_PERM_ENUM = `0x6A`;
43918	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43919	pub const _MM_PERM_BCCD: _MM_PERM_ENUM = `0x6B`;
43920	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43921	pub const _MM_PERM_BCDA: _MM_PERM_ENUM = `0x6C`;
43922	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43923	pub const _MM_PERM_BCDB: _MM_PERM_ENUM = `0x6D`;
43924	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43925	pub const _MM_PERM_BCDC: _MM_PERM_ENUM = `0x6E`;
43926	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43927	pub const _MM_PERM_BCDD: _MM_PERM_ENUM = `0x6F`;
43928	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43929	pub const _MM_PERM_BDAA: _MM_PERM_ENUM = `0x70`;
43930	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43931	pub const _MM_PERM_BDAB: _MM_PERM_ENUM = `0x71`;
43932	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43933	pub const _MM_PERM_BDAC: _MM_PERM_ENUM = `0x72`;
43934	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43935	pub const _MM_PERM_BDAD: _MM_PERM_ENUM = `0x73`;
43936	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43937	pub const _MM_PERM_BDBA: _MM_PERM_ENUM = `0x74`;
43938	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43939	pub const _MM_PERM_BDBB: _MM_PERM_ENUM = `0x75`;
43940	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43941	pub const _MM_PERM_BDBC: _MM_PERM_ENUM = `0x76`;
43942	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43943	pub const _MM_PERM_BDBD: _MM_PERM_ENUM = `0x77`;
43944	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43945	pub const _MM_PERM_BDCA: _MM_PERM_ENUM = `0x78`;
43946	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43947	pub const _MM_PERM_BDCB: _MM_PERM_ENUM = `0x79`;
43948	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43949	pub const _MM_PERM_BDCC: _MM_PERM_ENUM = `0x7A`;
43950	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43951	pub const _MM_PERM_BDCD: _MM_PERM_ENUM = `0x7B`;
43952	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43953	pub const _MM_PERM_BDDA: _MM_PERM_ENUM = `0x7C`;
43954	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43955	pub const _MM_PERM_BDDB: _MM_PERM_ENUM = `0x7D`;
43956	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43957	pub const _MM_PERM_BDDC: _MM_PERM_ENUM = `0x7E`;
43958	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43959	pub const _MM_PERM_BDDD: _MM_PERM_ENUM = `0x7F`;
43960	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43961	pub const _MM_PERM_CAAA: _MM_PERM_ENUM = `0x80`;
43962	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43963	pub const _MM_PERM_CAAB: _MM_PERM_ENUM = `0x81`;
43964	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43965	pub const _MM_PERM_CAAC: _MM_PERM_ENUM = `0x82`;
43966	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43967	pub const _MM_PERM_CAAD: _MM_PERM_ENUM = `0x83`;
43968	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43969	pub const _MM_PERM_CABA: _MM_PERM_ENUM = `0x84`;
43970	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43971	pub const _MM_PERM_CABB: _MM_PERM_ENUM = `0x85`;
43972	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43973	pub const _MM_PERM_CABC: _MM_PERM_ENUM = `0x86`;
43974	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43975	pub const _MM_PERM_CABD: _MM_PERM_ENUM = `0x87`;
43976	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43977	pub const _MM_PERM_CACA: _MM_PERM_ENUM = `0x88`;
43978	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43979	pub const _MM_PERM_CACB: _MM_PERM_ENUM = `0x89`;
43980	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43981	pub const _MM_PERM_CACC: _MM_PERM_ENUM = `0x8A`;
43982	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43983	pub const _MM_PERM_CACD: _MM_PERM_ENUM = `0x8B`;
43984	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43985	pub const _MM_PERM_CADA: _MM_PERM_ENUM = `0x8C`;
43986	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43987	pub const _MM_PERM_CADB: _MM_PERM_ENUM = `0x8D`;
43988	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43989	pub const _MM_PERM_CADC: _MM_PERM_ENUM = `0x8E`;
43990	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43991	pub const _MM_PERM_CADD: _MM_PERM_ENUM = `0x8F`;
43992	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43993	pub const _MM_PERM_CBAA: _MM_PERM_ENUM = `0x90`;
43994	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43995	pub const _MM_PERM_CBAB: _MM_PERM_ENUM = `0x91`;
43996	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43997	pub const _MM_PERM_CBAC: _MM_PERM_ENUM = `0x92`;
43998	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43999	pub const _MM_PERM_CBAD: _MM_PERM_ENUM = `0x93`;
44000	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44001	pub const _MM_PERM_CBBA: _MM_PERM_ENUM = `0x94`;
44002	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44003	pub const _MM_PERM_CBBB: _MM_PERM_ENUM = `0x95`;
44004	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44005	pub const _MM_PERM_CBBC: _MM_PERM_ENUM = `0x96`;
44006	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44007	pub const _MM_PERM_CBBD: _MM_PERM_ENUM = `0x97`;
44008	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44009	pub const _MM_PERM_CBCA: _MM_PERM_ENUM = `0x98`;
44010	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44011	pub const _MM_PERM_CBCB: _MM_PERM_ENUM = `0x99`;
44012	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44013	pub const _MM_PERM_CBCC: _MM_PERM_ENUM = `0x9A`;
44014	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44015	pub const _MM_PERM_CBCD: _MM_PERM_ENUM = `0x9B`;
44016	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44017	pub const _MM_PERM_CBDA: _MM_PERM_ENUM = `0x9C`;
44018	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44019	pub const _MM_PERM_CBDB: _MM_PERM_ENUM = `0x9D`;
44020	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44021	pub const _MM_PERM_CBDC: _MM_PERM_ENUM = `0x9E`;
44022	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44023	pub const _MM_PERM_CBDD: _MM_PERM_ENUM = `0x9F`;
44024	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44025	pub const _MM_PERM_CCAA: _MM_PERM_ENUM = `0xA0`;
44026	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44027	pub const _MM_PERM_CCAB: _MM_PERM_ENUM = `0xA1`;
44028	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44029	pub const _MM_PERM_CCAC: _MM_PERM_ENUM = `0xA2`;
44030	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44031	pub const _MM_PERM_CCAD: _MM_PERM_ENUM = `0xA3`;
44032	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44033	pub const _MM_PERM_CCBA: _MM_PERM_ENUM = `0xA4`;
44034	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44035	pub const _MM_PERM_CCBB: _MM_PERM_ENUM = `0xA5`;
44036	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44037	pub const _MM_PERM_CCBC: _MM_PERM_ENUM = `0xA6`;
44038	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44039	pub const _MM_PERM_CCBD: _MM_PERM_ENUM = `0xA7`;
44040	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44041	pub const _MM_PERM_CCCA: _MM_PERM_ENUM = `0xA8`;
44042	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44043	pub const _MM_PERM_CCCB: _MM_PERM_ENUM = `0xA9`;
44044	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44045	pub const _MM_PERM_CCCC: _MM_PERM_ENUM = `0xAA`;
44046	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44047	pub const _MM_PERM_CCCD: _MM_PERM_ENUM = `0xAB`;
44048	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44049	pub const _MM_PERM_CCDA: _MM_PERM_ENUM = `0xAC`;
44050	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44051	pub const _MM_PERM_CCDB: _MM_PERM_ENUM = `0xAD`;
44052	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44053	pub const _MM_PERM_CCDC: _MM_PERM_ENUM = `0xAE`;
44054	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44055	pub const _MM_PERM_CCDD: _MM_PERM_ENUM = `0xAF`;
44056	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44057	pub const _MM_PERM_CDAA: _MM_PERM_ENUM = `0xB0`;
44058	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44059	pub const _MM_PERM_CDAB: _MM_PERM_ENUM = `0xB1`;
44060	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44061	pub const _MM_PERM_CDAC: _MM_PERM_ENUM = `0xB2`;
44062	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44063	pub const _MM_PERM_CDAD: _MM_PERM_ENUM = `0xB3`;
44064	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44065	pub const _MM_PERM_CDBA: _MM_PERM_ENUM = `0xB4`;
44066	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44067	pub const _MM_PERM_CDBB: _MM_PERM_ENUM = `0xB5`;
44068	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44069	pub const _MM_PERM_CDBC: _MM_PERM_ENUM = `0xB6`;
44070	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44071	pub const _MM_PERM_CDBD: _MM_PERM_ENUM = `0xB7`;
44072	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44073	pub const _MM_PERM_CDCA: _MM_PERM_ENUM = `0xB8`;
44074	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44075	pub const _MM_PERM_CDCB: _MM_PERM_ENUM = `0xB9`;
44076	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44077	pub const _MM_PERM_CDCC: _MM_PERM_ENUM = `0xBA`;
44078	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44079	pub const _MM_PERM_CDCD: _MM_PERM_ENUM = `0xBB`;
44080	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44081	pub const _MM_PERM_CDDA: _MM_PERM_ENUM = `0xBC`;
44082	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44083	pub const _MM_PERM_CDDB: _MM_PERM_ENUM = `0xBD`;
44084	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44085	pub const _MM_PERM_CDDC: _MM_PERM_ENUM = `0xBE`;
44086	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44087	pub const _MM_PERM_CDDD: _MM_PERM_ENUM = `0xBF`;
44088	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44089	pub const _MM_PERM_DAAA: _MM_PERM_ENUM = `0xC0`;
44090	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44091	pub const _MM_PERM_DAAB: _MM_PERM_ENUM = `0xC1`;
44092	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44093	pub const _MM_PERM_DAAC: _MM_PERM_ENUM = `0xC2`;
44094	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44095	pub const _MM_PERM_DAAD: _MM_PERM_ENUM = `0xC3`;
44096	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44097	pub const _MM_PERM_DABA: _MM_PERM_ENUM = `0xC4`;
44098	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44099	pub const _MM_PERM_DABB: _MM_PERM_ENUM = `0xC5`;
44100	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44101	pub const _MM_PERM_DABC: _MM_PERM_ENUM = `0xC6`;
44102	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44103	pub const _MM_PERM_DABD: _MM_PERM_ENUM = `0xC7`;
44104	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44105	pub const _MM_PERM_DACA: _MM_PERM_ENUM = `0xC8`;
44106	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44107	pub const _MM_PERM_DACB: _MM_PERM_ENUM = `0xC9`;
44108	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44109	pub const _MM_PERM_DACC: _MM_PERM_ENUM = `0xCA`;
44110	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44111	pub const _MM_PERM_DACD: _MM_PERM_ENUM = `0xCB`;
44112	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44113	pub const _MM_PERM_DADA: _MM_PERM_ENUM = `0xCC`;
44114	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44115	pub const _MM_PERM_DADB: _MM_PERM_ENUM = `0xCD`;
44116	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44117	pub const _MM_PERM_DADC: _MM_PERM_ENUM = `0xCE`;
44118	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44119	pub const _MM_PERM_DADD: _MM_PERM_ENUM = `0xCF`;
44120	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44121	pub const _MM_PERM_DBAA: _MM_PERM_ENUM = `0xD0`;
44122	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44123	pub const _MM_PERM_DBAB: _MM_PERM_ENUM = `0xD1`;
44124	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44125	pub const _MM_PERM_DBAC: _MM_PERM_ENUM = `0xD2`;
44126	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44127	pub const _MM_PERM_DBAD: _MM_PERM_ENUM = `0xD3`;
44128	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44129	pub const _MM_PERM_DBBA: _MM_PERM_ENUM = `0xD4`;
44130	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44131	pub const _MM_PERM_DBBB: _MM_PERM_ENUM = `0xD5`;
44132	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44133	pub const _MM_PERM_DBBC: _MM_PERM_ENUM = `0xD6`;
44134	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44135	pub const _MM_PERM_DBBD: _MM_PERM_ENUM = `0xD7`;
44136	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44137	pub const _MM_PERM_DBCA: _MM_PERM_ENUM = `0xD8`;
44138	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44139	pub const _MM_PERM_DBCB: _MM_PERM_ENUM = `0xD9`;
44140	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44141	pub const _MM_PERM_DBCC: _MM_PERM_ENUM = `0xDA`;
44142	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44143	pub const _MM_PERM_DBCD: _MM_PERM_ENUM = `0xDB`;
44144	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44145	pub const _MM_PERM_DBDA: _MM_PERM_ENUM = `0xDC`;
44146	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44147	pub const _MM_PERM_DBDB: _MM_PERM_ENUM = `0xDD`;
44148	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44149	pub const _MM_PERM_DBDC: _MM_PERM_ENUM = `0xDE`;
44150	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44151	pub const _MM_PERM_DBDD: _MM_PERM_ENUM = `0xDF`;
44152	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44153	pub const _MM_PERM_DCAA: _MM_PERM_ENUM = `0xE0`;
44154	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44155	pub const _MM_PERM_DCAB: _MM_PERM_ENUM = `0xE1`;
44156	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44157	pub const _MM_PERM_DCAC: _MM_PERM_ENUM = `0xE2`;
44158	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44159	pub const _MM_PERM_DCAD: _MM_PERM_ENUM = `0xE3`;
44160	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44161	pub const _MM_PERM_DCBA: _MM_PERM_ENUM = `0xE4`;
44162	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44163	pub const _MM_PERM_DCBB: _MM_PERM_ENUM = `0xE5`;
44164	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44165	pub const _MM_PERM_DCBC: _MM_PERM_ENUM = `0xE6`;
44166	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44167	pub const _MM_PERM_DCBD: _MM_PERM_ENUM = `0xE7`;
44168	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44169	pub const _MM_PERM_DCCA: _MM_PERM_ENUM = `0xE8`;
44170	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44171	pub const _MM_PERM_DCCB: _MM_PERM_ENUM = `0xE9`;
44172	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44173	pub const _MM_PERM_DCCC: _MM_PERM_ENUM = `0xEA`;
44174	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44175	pub const _MM_PERM_DCCD: _MM_PERM_ENUM = `0xEB`;
44176	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44177	pub const _MM_PERM_DCDA: _MM_PERM_ENUM = `0xEC`;
44178	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44179	pub const _MM_PERM_DCDB: _MM_PERM_ENUM = `0xED`;
44180	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44181	pub const _MM_PERM_DCDC: _MM_PERM_ENUM = `0xEE`;
44182	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44183	pub const _MM_PERM_DCDD: _MM_PERM_ENUM = `0xEF`;
44184	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44185	pub const _MM_PERM_DDAA: _MM_PERM_ENUM = `0xF0`;
44186	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44187	pub const _MM_PERM_DDAB: _MM_PERM_ENUM = `0xF1`;
44188	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44189	pub const _MM_PERM_DDAC: _MM_PERM_ENUM = `0xF2`;
44190	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44191	pub const _MM_PERM_DDAD: _MM_PERM_ENUM = `0xF3`;
44192	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44193	pub const _MM_PERM_DDBA: _MM_PERM_ENUM = `0xF4`;
44194	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44195	pub const _MM_PERM_DDBB: _MM_PERM_ENUM = `0xF5`;
44196	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44197	pub const _MM_PERM_DDBC: _MM_PERM_ENUM = `0xF6`;
44198	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44199	pub const _MM_PERM_DDBD: _MM_PERM_ENUM = `0xF7`;
44200	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44201	pub const _MM_PERM_DDCA: _MM_PERM_ENUM = `0xF8`;
44202	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44203	pub const _MM_PERM_DDCB: _MM_PERM_ENUM = `0xF9`;
44204	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44205	pub const _MM_PERM_DDCC: _MM_PERM_ENUM = `0xFA`;
44206	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44207	pub const _MM_PERM_DDCD: _MM_PERM_ENUM = `0xFB`;
44208	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44209	pub const _MM_PERM_DDDA: _MM_PERM_ENUM = `0xFC`;
44210	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44211	pub const _MM_PERM_DDDB: _MM_PERM_ENUM = `0xFD`;
44212	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44213	pub const _MM_PERM_DDDC: _MM_PERM_ENUM = `0xFE`;
44214	#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44215	pub const _MM_PERM_DDDD: _MM_PERM_ENUM = `0xFF`;
44216
44217	#[allow(improper_ctypes)]
44218	unsafe extern "C" {
44219	#[link_name = "llvm.x86.avx512.sqrt.ps.512"]
44220	unsafefn vsqrtps(a: f32x16, rounding: i32) -> f32x16;
44221	#[link_name = "llvm.x86.avx512.sqrt.pd.512"]
44222	unsafefn vsqrtpd(a: f64x8, rounding: i32) -> f64x8;
44223
44224	#[link_name = "llvm.x86.avx512.vfmadd.ps.512"]
44225	unsafefn vfmadd132psround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512;
44226	#[link_name = "llvm.x86.avx512.vfmadd.pd.512"]
44227	unsafefn vfmadd132pdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d;
44228
44229	#[link_name = "llvm.x86.avx512.vfmaddsub.ps.512"]
44230	unsafefn vfmaddsubpsround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512; //from clang
44231	#[link_name = "llvm.x86.avx512.vfmaddsub.pd.512"]
44232	unsafefn vfmaddsubpdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d; //from clang
44233
44234	#[link_name = "llvm.x86.avx512.add.ps.512"]
44235	unsafefn vaddps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
44236	#[link_name = "llvm.x86.avx512.add.pd.512"]
44237	unsafefn vaddpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
44238	#[link_name = "llvm.x86.avx512.sub.ps.512"]
44239	unsafefn vsubps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
44240	#[link_name = "llvm.x86.avx512.sub.pd.512"]
44241	unsafefn vsubpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
44242	#[link_name = "llvm.x86.avx512.mul.ps.512"]
44243	unsafefn vmulps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
44244	#[link_name = "llvm.x86.avx512.mul.pd.512"]
44245	unsafefn vmulpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
44246	#[link_name = "llvm.x86.avx512.div.ps.512"]
44247	unsafefn vdivps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
44248	#[link_name = "llvm.x86.avx512.div.pd.512"]
44249	unsafefn vdivpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
44250
44251	#[link_name = "llvm.x86.avx512.max.ps.512"]
44252	unsafefn vmaxps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
44253	#[link_name = "llvm.x86.avx512.max.pd.512"]
44254	unsafefn vmaxpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
44255	#[link_name = "llvm.x86.avx512.min.ps.512"]
44256	unsafefn vminps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
44257	#[link_name = "llvm.x86.avx512.min.pd.512"]
44258	unsafefn vminpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
44259
44260	#[link_name = "llvm.x86.avx512.mask.getexp.ps.512"]
44261	unsafefn vgetexpps(a: f32x16, src: f32x16, m: u16, sae: i32) -> f32x16;
44262
44263	#[link_name = "llvm.x86.avx512.mask.getexp.ps.256"]
44264	unsafefn vgetexpps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
44265	#[link_name = "llvm.x86.avx512.mask.getexp.ps.128"]
44266	unsafefn vgetexpps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
44267
44268	#[link_name = "llvm.x86.avx512.mask.getexp.pd.512"]
44269	unsafefn vgetexppd(a: f64x8, src: f64x8, m: u8, sae: i32) -> f64x8;
44270	#[link_name = "llvm.x86.avx512.mask.getexp.pd.256"]
44271	unsafefn vgetexppd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
44272	#[link_name = "llvm.x86.avx512.mask.getexp.pd.128"]
44273	unsafefn vgetexppd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
44274
44275	#[link_name = "llvm.x86.avx512.mask.rndscale.ps.512"]
44276	unsafefn vrndscaleps(a: f32x16, imm8: i32, src: f32x16, mask: u16, sae: i32) -> f32x16;
44277	#[link_name = "llvm.x86.avx512.mask.rndscale.ps.256"]
44278	unsafefn vrndscaleps256(a: f32x8, imm8: i32, src: f32x8, mask: u8) -> f32x8;
44279	#[link_name = "llvm.x86.avx512.mask.rndscale.ps.128"]
44280	unsafefn vrndscaleps128(a: f32x4, imm8: i32, src: f32x4, mask: u8) -> f32x4;
44281
44282	#[link_name = "llvm.x86.avx512.mask.rndscale.pd.512"]
44283	unsafefn vrndscalepd(a: f64x8, imm8: i32, src: f64x8, mask: u8, sae: i32) -> f64x8;
44284	#[link_name = "llvm.x86.avx512.mask.rndscale.pd.256"]
44285	unsafefn vrndscalepd256(a: f64x4, imm8: i32, src: f64x4, mask: u8) -> f64x4;
44286	#[link_name = "llvm.x86.avx512.mask.rndscale.pd.128"]
44287	unsafefn vrndscalepd128(a: f64x2, imm8: i32, src: f64x2, mask: u8) -> f64x2;
44288
44289	#[link_name = "llvm.x86.avx512.mask.scalef.ps.512"]
44290	unsafefn vscalefps(a: f32x16, b: f32x16, src: f32x16, mask: u16, rounding: i32) -> f32x16;
44291	#[link_name = "llvm.x86.avx512.mask.scalef.ps.256"]
44292	unsafefn vscalefps256(a: f32x8, b: f32x8, src: f32x8, mask: u8) -> f32x8;
44293	#[link_name = "llvm.x86.avx512.mask.scalef.ps.128"]
44294	unsafefn vscalefps128(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
44295
44296	#[link_name = "llvm.x86.avx512.mask.scalef.pd.512"]
44297	unsafefn vscalefpd(a: f64x8, b: f64x8, src: f64x8, mask: u8, rounding: i32) -> f64x8;
44298	#[link_name = "llvm.x86.avx512.mask.scalef.pd.256"]
44299	unsafefn vscalefpd256(a: f64x4, b: f64x4, src: f64x4, mask: u8) -> f64x4;
44300	#[link_name = "llvm.x86.avx512.mask.scalef.pd.128"]
44301	unsafefn vscalefpd128(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
44302
44303	#[link_name = "llvm.x86.avx512.mask.fixupimm.ps.512"]
44304	unsafefn vfixupimmps(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
44305	#[link_name = "llvm.x86.avx512.mask.fixupimm.ps.256"]
44306	unsafefn vfixupimmps256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
44307	#[link_name = "llvm.x86.avx512.mask.fixupimm.ps.128"]
44308	unsafefn vfixupimmps128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
44309
44310	#[link_name = "llvm.x86.avx512.mask.fixupimm.pd.512"]
44311	unsafefn vfixupimmpd(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
44312	#[link_name = "llvm.x86.avx512.mask.fixupimm.pd.256"]
44313	unsafefn vfixupimmpd256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
44314	#[link_name = "llvm.x86.avx512.mask.fixupimm.pd.128"]
44315	unsafefn vfixupimmpd128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
44316
44317	#[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.512"]
44318	unsafefn vfixupimmpsz(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
44319	#[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.256"]
44320	unsafefn vfixupimmpsz256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
44321	#[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.128"]
44322	unsafefn vfixupimmpsz128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
44323
44324	#[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.512"]
44325	unsafefn vfixupimmpdz(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
44326	#[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.256"]
44327	unsafefn vfixupimmpdz256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
44328	#[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.128"]
44329	unsafefn vfixupimmpdz128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
44330
44331	#[link_name = "llvm.x86.avx512.pternlog.d.512"]
44332	unsafefn vpternlogd(a: i32x16, b: i32x16, c: i32x16, imm8: i32) -> i32x16;
44333	#[link_name = "llvm.x86.avx512.pternlog.d.256"]
44334	unsafefn vpternlogd256(a: i32x8, b: i32x8, c: i32x8, imm8: i32) -> i32x8;
44335	#[link_name = "llvm.x86.avx512.pternlog.d.128"]
44336	unsafefn vpternlogd128(a: i32x4, b: i32x4, c: i32x4, imm8: i32) -> i32x4;
44337
44338	#[link_name = "llvm.x86.avx512.pternlog.q.512"]
44339	unsafefn vpternlogq(a: i64x8, b: i64x8, c: i64x8, imm8: i32) -> i64x8;
44340	#[link_name = "llvm.x86.avx512.pternlog.q.256"]
44341	unsafefn vpternlogq256(a: i64x4, b: i64x4, c: i64x4, imm8: i32) -> i64x4;
44342	#[link_name = "llvm.x86.avx512.pternlog.q.128"]
44343	unsafefn vpternlogq128(a: i64x2, b: i64x2, c: i64x2, imm8: i32) -> i64x2;
44344
44345	#[link_name = "llvm.x86.avx512.mask.getmant.ps.512"]
44346	unsafefn vgetmantps(a: f32x16, mantissas: i32, src: f32x16, m: u16, sae: i32) -> f32x16;
44347	#[link_name = "llvm.x86.avx512.mask.getmant.ps.256"]
44348	unsafefn vgetmantps256(a: f32x8, mantissas: i32, src: f32x8, m: u8) -> f32x8;
44349	#[link_name = "llvm.x86.avx512.mask.getmant.ps.128"]
44350	unsafefn vgetmantps128(a: f32x4, mantissas: i32, src: f32x4, m: u8) -> f32x4;
44351
44352	#[link_name = "llvm.x86.avx512.mask.getmant.pd.512"]
44353	unsafefn vgetmantpd(a: f64x8, mantissas: i32, src: f64x8, m: u8, sae: i32) -> f64x8;
44354	#[link_name = "llvm.x86.avx512.mask.getmant.pd.256"]
44355	unsafefn vgetmantpd256(a: f64x4, mantissas: i32, src: f64x4, m: u8) -> f64x4;
44356	#[link_name = "llvm.x86.avx512.mask.getmant.pd.128"]
44357	unsafefn vgetmantpd128(a: f64x2, mantissas: i32, src: f64x2, m: u8) -> f64x2;
44358
44359	#[link_name = "llvm.x86.avx512.rcp14.ps.512"]
44360	unsafefn vrcp14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
44361	#[link_name = "llvm.x86.avx512.rcp14.ps.256"]
44362	unsafefn vrcp14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
44363	#[link_name = "llvm.x86.avx512.rcp14.ps.128"]
44364	unsafefn vrcp14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
44365
44366	#[link_name = "llvm.x86.avx512.rcp14.pd.512"]
44367	unsafefn vrcp14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
44368	#[link_name = "llvm.x86.avx512.rcp14.pd.256"]
44369	unsafefn vrcp14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
44370	#[link_name = "llvm.x86.avx512.rcp14.pd.128"]
44371	unsafefn vrcp14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
44372
44373	#[link_name = "llvm.x86.avx512.rsqrt14.ps.512"]
44374	unsafefn vrsqrt14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
44375	#[link_name = "llvm.x86.avx512.rsqrt14.ps.256"]
44376	unsafefn vrsqrt14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
44377	#[link_name = "llvm.x86.avx512.rsqrt14.ps.128"]
44378	unsafefn vrsqrt14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
44379
44380	#[link_name = "llvm.x86.avx512.rsqrt14.pd.512"]
44381	unsafefn vrsqrt14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
44382	#[link_name = "llvm.x86.avx512.rsqrt14.pd.256"]
44383	unsafefn vrsqrt14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
44384	#[link_name = "llvm.x86.avx512.rsqrt14.pd.128"]
44385	unsafefn vrsqrt14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
44386
44387	#[link_name = "llvm.x86.avx512.mask.cvtps2dq.512"]
44388	unsafefn vcvtps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
44389
44390	#[link_name = "llvm.x86.avx512.mask.cvtps2udq.512"]
44391	unsafefn vcvtps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
44392	#[link_name = "llvm.x86.avx512.mask.cvtps2udq.256"]
44393	unsafefn vcvtps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
44394	#[link_name = "llvm.x86.avx512.mask.cvtps2udq.128"]
44395	unsafefn vcvtps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
44396
44397	#[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"]
44398	unsafefn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8;
44399	#[link_name = "llvm.x86.avx512.mask.cvtpd2ps"]
44400	unsafefn vcvtpd2ps128(a: f64x2, src: f32x4, mask: u8) -> f32x4;
44401	#[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"]
44402	unsafefn vcvtpd2ps(a: f64x8, src: f32x8, mask: u8, rounding: i32) -> f32x8;
44403
44404	#[link_name = "llvm.x86.avx512.mask.cvtpd2dq.128"]
44405	unsafefn vcvtpd2dq128(a: f64x2, src: i32x4, k: u8) -> i32x4;
44406	#[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"]
44407	unsafefn vcvtpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
44408
44409	#[link_name = "llvm.x86.avx512.mask.cvtpd2udq.512"]
44410	unsafefn vcvtpd2udq(a: f64x8, src: u32x8, mask: u8, rounding: i32) -> u32x8;
44411	#[link_name = "llvm.x86.avx512.mask.cvtpd2udq.256"]
44412	unsafefn vcvtpd2udq256(a: f64x4, src: u32x4, mask: u8) -> u32x4;
44413	#[link_name = "llvm.x86.avx512.mask.cvtpd2udq.128"]
44414	unsafefn vcvtpd2udq128(a: f64x2, src: u32x4, mask: u8) -> u32x4;
44415
44416	#[link_name = "llvm.x86.avx512.sitofp.round.v16f32.v16i32"]
44417	unsafefn vcvtdq2ps(a: i32x16, rounding: i32) -> f32x16;
44418	#[link_name = "llvm.x86.avx512.uitofp.round.v16f32.v16i32"]
44419	unsafefn vcvtudq2ps(a: u32x16, rounding: i32) -> f32x16;
44420
44421	#[link_name = "llvm.x86.avx512.mask.vcvtps2ph.512"]
44422	unsafefn vcvtps2ph(a: f32x16, rounding: i32, src: i16x16, mask: u16) -> i16x16;
44423	#[link_name = "llvm.x86.avx512.mask.vcvtps2ph.256"]
44424	unsafefn vcvtps2ph256(a: f32x8, imm8: i32, src: i16x8, mask: u8) -> i16x8;
44425	#[link_name = "llvm.x86.avx512.mask.vcvtps2ph.128"]
44426	unsafefn vcvtps2ph128(a: f32x4, imm8: i32, src: i16x8, mask: u8) -> i16x8;
44427
44428	#[link_name = "llvm.x86.avx512.mask.vcvtph2ps.512"]
44429	unsafefn vcvtph2ps(a: i16x16, src: f32x16, mask: u16, sae: i32) -> f32x16;
44430
44431	#[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"]
44432	unsafefn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
44433	#[link_name = "llvm.x86.avx512.mask.cvttps2dq.256"]
44434	unsafefn vcvttps2dq256(a: f32x8, src: i32x8, mask: u8) -> i32x8;
44435	#[link_name = "llvm.x86.avx512.mask.cvttps2dq.128"]
44436	unsafefn vcvttps2dq128(a: f32x4, src: i32x4, mask: u8) -> i32x4;
44437
44438	#[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"]
44439	unsafefn vcvttps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
44440	#[link_name = "llvm.x86.avx512.mask.cvttps2udq.256"]
44441	unsafefn vcvttps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
44442	#[link_name = "llvm.x86.avx512.mask.cvttps2udq.128"]
44443	unsafefn vcvttps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
44444
44445	#[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"]
44446	unsafefn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
44447	#[link_name = "llvm.x86.avx512.mask.cvttpd2dq.256"]
44448	unsafefn vcvttpd2dq256(a: f64x4, src: i32x4, mask: u8) -> i32x4;
44449	#[link_name = "llvm.x86.avx512.mask.cvttpd2dq.128"]
44450	unsafefn vcvttpd2dq128(a: f64x2, src: i32x4, mask: u8) -> i32x4;
44451
44452	#[link_name = "llvm.x86.avx512.mask.cvttpd2udq.512"]
44453	unsafefn vcvttpd2udq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> u32x8;
44454	#[link_name = "llvm.x86.avx512.mask.cvttpd2udq.256"]
44455	unsafefn vcvttpd2udq256(a: f64x4, src: i32x4, mask: u8) -> u32x4;
44456	#[link_name = "llvm.x86.avx512.mask.cvttpd2udq.128"]
44457	unsafefn vcvttpd2udq128(a: f64x2, src: i32x4, mask: u8) -> u32x4;
44458
44459	#[link_name = "llvm.x86.avx512.mask.pmov.dw.128"]
44460	unsafefn vpmovdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
44461	#[link_name = "llvm.x86.avx512.mask.pmov.db.256"]
44462	unsafefn vpmovdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
44463	#[link_name = "llvm.x86.avx512.mask.pmov.db.128"]
44464	unsafefn vpmovdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
44465
44466	#[link_name = "llvm.x86.avx512.mask.pmov.qw.256"]
44467	unsafefn vpmovqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
44468	#[link_name = "llvm.x86.avx512.mask.pmov.qw.128"]
44469	unsafefn vpmovqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
44470	#[link_name = "llvm.x86.avx512.mask.pmov.qb.256"]
44471	unsafefn vpmovqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
44472	#[link_name = "llvm.x86.avx512.mask.pmov.qb.128"]
44473	unsafefn vpmovqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
44474	#[link_name = "llvm.x86.avx512.mask.pmov.qd.128"]
44475	unsafefn vpmovqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
44476
44477	#[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.512"]
44478	unsafefn vpmovdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
44479	#[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.256"]
44480	unsafefn vpmovdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
44481	#[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.128"]
44482	unsafefn vpmovdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
44483
44484	#[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.512"]
44485	unsafefn vpmovsdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
44486	#[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.256"]
44487	unsafefn vpmovsdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
44488	#[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.128"]
44489	unsafefn vpmovsdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
44490
44491	#[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.512"]
44492	unsafefn vpmovusdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
44493	#[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.256"]
44494	unsafefn vpmovusdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
44495	#[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.128"]
44496	unsafefn vpmovusdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
44497
44498	#[link_name = "llvm.x86.avx512.mask.pmov.db.mem.512"]
44499	unsafefn vpmovdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
44500	#[link_name = "llvm.x86.avx512.mask.pmov.db.mem.256"]
44501	unsafefn vpmovdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
44502	#[link_name = "llvm.x86.avx512.mask.pmov.db.mem.128"]
44503	unsafefn vpmovdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
44504
44505	#[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.512"]
44506	unsafefn vpmovsdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
44507	#[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.256"]
44508	unsafefn vpmovsdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
44509	#[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.128"]
44510	unsafefn vpmovsdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
44511
44512	#[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.512"]
44513	unsafefn vpmovusdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
44514	#[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.256"]
44515	unsafefn vpmovusdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
44516	#[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.128"]
44517	unsafefn vpmovusdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
44518
44519	#[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.512"]
44520	unsafefn vpmovqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44521	#[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.256"]
44522	unsafefn vpmovqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44523	#[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.128"]
44524	unsafefn vpmovqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44525
44526	#[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.512"]
44527	unsafefn vpmovsqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44528	#[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.256"]
44529	unsafefn vpmovsqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44530	#[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.128"]
44531	unsafefn vpmovsqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44532
44533	#[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.512"]
44534	unsafefn vpmovusqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44535	#[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.256"]
44536	unsafefn vpmovusqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44537	#[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.128"]
44538	unsafefn vpmovusqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44539
44540	#[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.512"]
44541	unsafefn vpmovqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44542	#[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.256"]
44543	unsafefn vpmovqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44544	#[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.128"]
44545	unsafefn vpmovqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44546
44547	#[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.512"]
44548	unsafefn vpmovsqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44549	#[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.256"]
44550	unsafefn vpmovsqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44551	#[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.128"]
44552	unsafefn vpmovsqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44553
44554	#[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.512"]
44555	unsafefn vpmovusqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44556	#[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.256"]
44557	unsafefn vpmovusqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44558	#[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.128"]
44559	unsafefn vpmovusqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44560
44561	#[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.512"]
44562	unsafefn vpmovqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44563	#[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.256"]
44564	unsafefn vpmovqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44565	#[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.128"]
44566	unsafefn vpmovqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44567
44568	#[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.512"]
44569	unsafefn vpmovsqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44570	#[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.256"]
44571	unsafefn vpmovsqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44572	#[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.128"]
44573	unsafefn vpmovsqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44574
44575	#[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.512"]
44576	unsafefn vpmovusqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44577	#[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.256"]
44578	unsafefn vpmovusqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44579	#[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.128"]
44580	unsafefn vpmovusqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44581
44582	#[link_name = "llvm.x86.avx512.mask.pmov.qb.512"]
44583	unsafefn vpmovqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
44584
44585	#[link_name = "llvm.x86.avx512.mask.pmovs.dw.512"]
44586	unsafefn vpmovsdw(a: i32x16, src: i16x16, mask: u16) -> i16x16;
44587	#[link_name = "llvm.x86.avx512.mask.pmovs.dw.256"]
44588	unsafefn vpmovsdw256(a: i32x8, src: i16x8, mask: u8) -> i16x8;
44589	#[link_name = "llvm.x86.avx512.mask.pmovs.dw.128"]
44590	unsafefn vpmovsdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
44591
44592	#[link_name = "llvm.x86.avx512.mask.pmovs.db.512"]
44593	unsafefn vpmovsdb(a: i32x16, src: i8x16, mask: u16) -> i8x16;
44594	#[link_name = "llvm.x86.avx512.mask.pmovs.db.256"]
44595	unsafefn vpmovsdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
44596	#[link_name = "llvm.x86.avx512.mask.pmovs.db.128"]
44597	unsafefn vpmovsdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
44598
44599	#[link_name = "llvm.x86.avx512.mask.pmovs.qd.512"]
44600	unsafefn vpmovsqd(a: i64x8, src: i32x8, mask: u8) -> i32x8;
44601	#[link_name = "llvm.x86.avx512.mask.pmovs.qd.256"]
44602	unsafefn vpmovsqd256(a: i64x4, src: i32x4, mask: u8) -> i32x4;
44603	#[link_name = "llvm.x86.avx512.mask.pmovs.qd.128"]
44604	unsafefn vpmovsqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
44605
44606	#[link_name = "llvm.x86.avx512.mask.pmovs.qw.512"]
44607	unsafefn vpmovsqw(a: i64x8, src: i16x8, mask: u8) -> i16x8;
44608	#[link_name = "llvm.x86.avx512.mask.pmovs.qw.256"]
44609	unsafefn vpmovsqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
44610	#[link_name = "llvm.x86.avx512.mask.pmovs.qw.128"]
44611	unsafefn vpmovsqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
44612
44613	#[link_name = "llvm.x86.avx512.mask.pmovs.qb.512"]
44614	unsafefn vpmovsqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
44615	#[link_name = "llvm.x86.avx512.mask.pmovs.qb.256"]
44616	unsafefn vpmovsqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
44617	#[link_name = "llvm.x86.avx512.mask.pmovs.qb.128"]
44618	unsafefn vpmovsqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
44619
44620	#[link_name = "llvm.x86.avx512.mask.pmovus.dw.512"]
44621	unsafefn vpmovusdw(a: u32x16, src: u16x16, mask: u16) -> u16x16;
44622	#[link_name = "llvm.x86.avx512.mask.pmovus.dw.256"]
44623	unsafefn vpmovusdw256(a: u32x8, src: u16x8, mask: u8) -> u16x8;
44624	#[link_name = "llvm.x86.avx512.mask.pmovus.dw.128"]
44625	unsafefn vpmovusdw128(a: u32x4, src: u16x8, mask: u8) -> u16x8;
44626
44627	#[link_name = "llvm.x86.avx512.mask.pmovus.db.512"]
44628	unsafefn vpmovusdb(a: u32x16, src: u8x16, mask: u16) -> u8x16;
44629	#[link_name = "llvm.x86.avx512.mask.pmovus.db.256"]
44630	unsafefn vpmovusdb256(a: u32x8, src: u8x16, mask: u8) -> u8x16;
44631	#[link_name = "llvm.x86.avx512.mask.pmovus.db.128"]
44632	unsafefn vpmovusdb128(a: u32x4, src: u8x16, mask: u8) -> u8x16;
44633
44634	#[link_name = "llvm.x86.avx512.mask.pmovus.qd.512"]
44635	unsafefn vpmovusqd(a: u64x8, src: u32x8, mask: u8) -> u32x8;
44636	#[link_name = "llvm.x86.avx512.mask.pmovus.qd.256"]
44637	unsafefn vpmovusqd256(a: u64x4, src: u32x4, mask: u8) -> u32x4;
44638	#[link_name = "llvm.x86.avx512.mask.pmovus.qd.128"]
44639	unsafefn vpmovusqd128(a: u64x2, src: u32x4, mask: u8) -> u32x4;
44640
44641	#[link_name = "llvm.x86.avx512.mask.pmovus.qw.512"]
44642	unsafefn vpmovusqw(a: u64x8, src: u16x8, mask: u8) -> u16x8;
44643	#[link_name = "llvm.x86.avx512.mask.pmovus.qw.256"]
44644	unsafefn vpmovusqw256(a: u64x4, src: u16x8, mask: u8) -> u16x8;
44645	#[link_name = "llvm.x86.avx512.mask.pmovus.qw.128"]
44646	unsafefn vpmovusqw128(a: u64x2, src: u16x8, mask: u8) -> u16x8;
44647
44648	#[link_name = "llvm.x86.avx512.mask.pmovus.qb.512"]
44649	unsafefn vpmovusqb(a: u64x8, src: u8x16, mask: u8) -> u8x16;
44650	#[link_name = "llvm.x86.avx512.mask.pmovus.qb.256"]
44651	unsafefn vpmovusqb256(a: u64x4, src: u8x16, mask: u8) -> u8x16;
44652	#[link_name = "llvm.x86.avx512.mask.pmovus.qb.128"]
44653	unsafefn vpmovusqb128(a: u64x2, src: u8x16, mask: u8) -> u8x16;
44654
44655	#[link_name = "llvm.x86.avx512.gather.dpd.512"]
44656	unsafefn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
44657	#[link_name = "llvm.x86.avx512.gather.dps.512"]
44658	unsafefn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16;
44659	#[link_name = "llvm.x86.avx512.gather.qpd.512"]
44660	unsafefn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8;
44661	#[link_name = "llvm.x86.avx512.gather.qps.512"]
44662	unsafefn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8;
44663	#[link_name = "llvm.x86.avx512.gather.dpq.512"]
44664	unsafefn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8;
44665	#[link_name = "llvm.x86.avx512.gather.dpi.512"]
44666	unsafefn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16;
44667	#[link_name = "llvm.x86.avx512.gather.qpq.512"]
44668	unsafefn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8;
44669	#[link_name = "llvm.x86.avx512.gather.qpi.512"]
44670	unsafefn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8;
44671
44672	#[link_name = "llvm.x86.avx512.scatter.dpd.512"]
44673	unsafefn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32);
44674	#[link_name = "llvm.x86.avx512.scatter.dps.512"]
44675	unsafefn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32);
44676	#[link_name = "llvm.x86.avx512.scatter.qpd.512"]
44677	unsafefn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32);
44678	#[link_name = "llvm.x86.avx512.scatter.qps.512"]
44679	unsafefn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
44680	#[link_name = "llvm.x86.avx512.scatter.dpq.512"]
44681	unsafefn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
44682
44683	#[link_name = "llvm.x86.avx512.scatter.dpi.512"]
44684	unsafefn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
44685	#[link_name = "llvm.x86.avx512.scatter.qpq.512"]
44686	unsafefn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32);
44687	#[link_name = "llvm.x86.avx512.scatter.qpi.512"]
44688	unsafefn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
44689
44690	#[link_name = "llvm.x86.avx512.scattersiv4.si"]
44691	unsafefn vpscatterdd_128(slice: *mut i8, k: u8, offsets: i32x4, src: i32x4, scale: i32);
44692	#[link_name = "llvm.x86.avx512.scattersiv2.di"]
44693	unsafefn vpscatterdq_128(slice: *mut i8, k: u8, offsets: i32x4, src: i64x2, scale: i32);
44694	#[link_name = "llvm.x86.avx512.scattersiv2.df"]
44695	unsafefn vscatterdpd_128(slice: *mut i8, k: u8, offsets: i32x4, src: f64x2, scale: i32);
44696	#[link_name = "llvm.x86.avx512.scattersiv4.sf"]
44697	unsafefn vscatterdps_128(slice: *mut i8, k: u8, offsets: i32x4, src: f32x4, scale: i32);
44698	#[link_name = "llvm.x86.avx512.scatterdiv4.si"]
44699	unsafefn vpscatterqd_128(slice: *mut i8, k: u8, offsets: i64x2, src: i32x4, scale: i32);
44700	#[link_name = "llvm.x86.avx512.scatterdiv2.di"]
44701	unsafefn vpscatterqq_128(slice: *mut i8, k: u8, offsets: i64x2, src: i64x2, scale: i32);
44702	#[link_name = "llvm.x86.avx512.scatterdiv2.df"]
44703	unsafefn vscatterqpd_128(slice: *mut i8, k: u8, offsets: i64x2, src: f64x2, scale: i32);
44704	#[link_name = "llvm.x86.avx512.scatterdiv4.sf"]
44705	unsafefn vscatterqps_128(slice: *mut i8, k: u8, offsets: i64x2, src: f32x4, scale: i32);
44706
44707	#[link_name = "llvm.x86.avx512.scattersiv8.si"]
44708	unsafefn vpscatterdd_256(slice: *mut i8, k: u8, offsets: i32x8, src: i32x8, scale: i32);
44709	#[link_name = "llvm.x86.avx512.scattersiv4.di"]
44710	unsafefn vpscatterdq_256(slice: *mut i8, k: u8, offsets: i32x4, src: i64x4, scale: i32);
44711	#[link_name = "llvm.x86.avx512.scattersiv4.df"]
44712	unsafefn vscatterdpd_256(slice: *mut i8, k: u8, offsets: i32x4, src: f64x4, scale: i32);
44713	#[link_name = "llvm.x86.avx512.scattersiv8.sf"]
44714	unsafefn vscatterdps_256(slice: *mut i8, k: u8, offsets: i32x8, src: f32x8, scale: i32);
44715	#[link_name = "llvm.x86.avx512.scatterdiv8.si"]
44716	unsafefn vpscatterqd_256(slice: *mut i8, k: u8, offsets: i64x4, src: i32x4, scale: i32);
44717	#[link_name = "llvm.x86.avx512.scatterdiv4.di"]
44718	unsafefn vpscatterqq_256(slice: *mut i8, k: u8, offsets: i64x4, src: i64x4, scale: i32);
44719	#[link_name = "llvm.x86.avx512.scatterdiv4.df"]
44720	unsafefn vscatterqpd_256(slice: *mut i8, k: u8, offsets: i64x4, src: f64x4, scale: i32);
44721	#[link_name = "llvm.x86.avx512.scatterdiv8.sf"]
44722	unsafefn vscatterqps_256(slice: *mut i8, k: u8, offsets: i64x4, src: f32x4, scale: i32);
44723
44724	#[link_name = "llvm.x86.avx512.gather3siv4.si"]
44725	unsafefn vpgatherdd_128(src: i32x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i32x4;
44726	#[link_name = "llvm.x86.avx512.gather3siv2.di"]
44727	unsafefn vpgatherdq_128(src: i64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x2;
44728	#[link_name = "llvm.x86.avx512.gather3siv2.df"]
44729	unsafefn vgatherdpd_128(src: f64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x2;
44730	#[link_name = "llvm.x86.avx512.gather3siv4.sf"]
44731	unsafefn vgatherdps_128(src: f32x4, slice: *const u8, offsets: i32x4, k: u8, scale: i32) -> f32x4;
44732	#[link_name = "llvm.x86.avx512.gather3div4.si"]
44733	unsafefn vpgatherqd_128(src: i32x4, slice: *const u8, offsets: i64x2, k: u8, scale: i32) -> i32x4;
44734	#[link_name = "llvm.x86.avx512.gather3div2.di"]
44735	unsafefn vpgatherqq_128(src: i64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> i64x2;
44736	#[link_name = "llvm.x86.avx512.gather3div2.df"]
44737	unsafefn vgatherqpd_128(src: f64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f64x2;
44738	#[link_name = "llvm.x86.avx512.gather3div4.sf"]
44739	unsafefn vgatherqps_128(src: f32x4, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f32x4;
44740
44741	#[link_name = "llvm.x86.avx512.gather3siv8.si"]
44742	unsafefn vpgatherdd_256(src: i32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> i32x8;
44743	#[link_name = "llvm.x86.avx512.gather3siv4.di"]
44744	unsafefn vpgatherdq_256(src: i64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x4;
44745	#[link_name = "llvm.x86.avx512.gather3siv4.df"]
44746	unsafefn vgatherdpd_256(src: f64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x4;
44747	#[link_name = "llvm.x86.avx512.gather3siv8.sf"]
44748	unsafefn vgatherdps_256(src: f32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> f32x8;
44749	#[link_name = "llvm.x86.avx512.gather3div8.si"]
44750	unsafefn vpgatherqd_256(src: i32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i32x4;
44751	#[link_name = "llvm.x86.avx512.gather3div4.di"]
44752	unsafefn vpgatherqq_256(src: i64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i64x4;
44753	#[link_name = "llvm.x86.avx512.gather3div4.df"]
44754	unsafefn vgatherqpd_256(src: f64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f64x4;
44755	#[link_name = "llvm.x86.avx512.gather3div8.sf"]
44756	unsafefn vgatherqps_256(src: f32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f32x4;
44757
44758	#[link_name = "llvm.x86.avx512.mask.cmp.ss"]
44759	unsafefn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
44760	#[link_name = "llvm.x86.avx512.mask.cmp.sd"]
44761	unsafefn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
44762
44763	#[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
44764	unsafefn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
44765	#[link_name = "llvm.x86.avx512.mask.cmp.ps.256"]
44766	unsafefn vcmpps256(a: f32x8, b: f32x8, op: i32, m: i8) -> i8;
44767	#[link_name = "llvm.x86.avx512.mask.cmp.ps.128"]
44768	unsafefn vcmpps128(a: f32x4, b: f32x4, op: i32, m: i8) -> i8;
44769
44770	#[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
44771	unsafefn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
44772	#[link_name = "llvm.x86.avx512.mask.cmp.pd.256"]
44773	unsafefn vcmppd256(a: f64x4, b: f64x4, op: i32, m: i8) -> i8;
44774	#[link_name = "llvm.x86.avx512.mask.cmp.pd.128"]
44775	unsafefn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8;
44776
44777	#[link_name = "llvm.x86.avx512.psll.d.512"]
44778	unsafefn vpslld(a: i32x16, count: i32x4) -> i32x16;
44779	#[link_name = "llvm.x86.avx512.psrl.d.512"]
44780	unsafefn vpsrld(a: i32x16, count: i32x4) -> i32x16;
44781	#[link_name = "llvm.x86.avx512.psll.q.512"]
44782	unsafefn vpsllq(a: i64x8, count: i64x2) -> i64x8;
44783	#[link_name = "llvm.x86.avx512.psrl.q.512"]
44784	unsafefn vpsrlq(a: i64x8, count: i64x2) -> i64x8;
44785
44786	#[link_name = "llvm.x86.avx512.psra.d.512"]
44787	unsafefn vpsrad(a: i32x16, count: i32x4) -> i32x16;
44788
44789	#[link_name = "llvm.x86.avx512.psra.q.512"]
44790	unsafefn vpsraq(a: i64x8, count: i64x2) -> i64x8;
44791	#[link_name = "llvm.x86.avx512.psra.q.256"]
44792	unsafefn vpsraq256(a: i64x4, count: i64x2) -> i64x4;
44793	#[link_name = "llvm.x86.avx512.psra.q.128"]
44794	unsafefn vpsraq128(a: i64x2, count: i64x2) -> i64x2;
44795
44796	#[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
44797	unsafefn vpermilps(a: f32x16, b: i32x16) -> f32x16;
44798	#[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
44799	unsafefn vpermilpd(a: f64x8, b: i64x8) -> f64x8;
44800
44801	#[link_name = "llvm.x86.avx512.permvar.si.512"]
44802	unsafefn vpermd(a: i32x16, idx: i32x16) -> i32x16;
44803
44804	#[link_name = "llvm.x86.avx512.permvar.di.512"]
44805	unsafefn vpermq(a: i64x8, idx: i64x8) -> i64x8;
44806	#[link_name = "llvm.x86.avx512.permvar.di.256"]
44807	unsafefn vpermq256(a: i64x4, idx: i64x4) -> i64x4;
44808
44809	#[link_name = "llvm.x86.avx512.permvar.sf.512"]
44810	unsafefn vpermps(a: f32x16, idx: i32x16) -> f32x16;
44811
44812	#[link_name = "llvm.x86.avx512.permvar.df.512"]
44813	unsafefn vpermpd(a: f64x8, idx: i64x8) -> f64x8;
44814	#[link_name = "llvm.x86.avx512.permvar.df.256"]
44815	unsafefn vpermpd256(a: f64x4, idx: i64x4) -> f64x4;
44816
44817	#[link_name = "llvm.x86.avx512.vpermi2var.d.512"]
44818	unsafefn vpermi2d(a: i32x16, idx: i32x16, b: i32x16) -> i32x16;
44819	#[link_name = "llvm.x86.avx512.vpermi2var.d.256"]
44820	unsafefn vpermi2d256(a: i32x8, idx: i32x8, b: i32x8) -> i32x8;
44821	#[link_name = "llvm.x86.avx512.vpermi2var.d.128"]
44822	unsafefn vpermi2d128(a: i32x4, idx: i32x4, b: i32x4) -> i32x4;
44823
44824	#[link_name = "llvm.x86.avx512.vpermi2var.q.512"]
44825	unsafefn vpermi2q(a: i64x8, idx: i64x8, b: i64x8) -> i64x8;
44826	#[link_name = "llvm.x86.avx512.vpermi2var.q.256"]
44827	unsafefn vpermi2q256(a: i64x4, idx: i64x4, b: i64x4) -> i64x4;
44828	#[link_name = "llvm.x86.avx512.vpermi2var.q.128"]
44829	unsafefn vpermi2q128(a: i64x2, idx: i64x2, b: i64x2) -> i64x2;
44830
44831	#[link_name = "llvm.x86.avx512.vpermi2var.ps.512"]
44832	unsafefn vpermi2ps(a: f32x16, idx: i32x16, b: f32x16) -> f32x16;
44833	#[link_name = "llvm.x86.avx512.vpermi2var.ps.256"]
44834	unsafefn vpermi2ps256(a: f32x8, idx: i32x8, b: f32x8) -> f32x8;
44835	#[link_name = "llvm.x86.avx512.vpermi2var.ps.128"]
44836	unsafefn vpermi2ps128(a: f32x4, idx: i32x4, b: f32x4) -> f32x4;
44837
44838	#[link_name = "llvm.x86.avx512.vpermi2var.pd.512"]
44839	unsafefn vpermi2pd(a: f64x8, idx: i64x8, b: f64x8) -> f64x8;
44840	#[link_name = "llvm.x86.avx512.vpermi2var.pd.256"]
44841	unsafefn vpermi2pd256(a: f64x4, idx: i64x4, b: f64x4) -> f64x4;
44842	#[link_name = "llvm.x86.avx512.vpermi2var.pd.128"]
44843	unsafefn vpermi2pd128(a: f64x2, idx: i64x2, b: f64x2) -> f64x2;
44844
44845	#[link_name = "llvm.x86.avx512.mask.compress.d.512"]
44846	unsafefn vpcompressd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
44847	#[link_name = "llvm.x86.avx512.mask.compress.d.256"]
44848	unsafefn vpcompressd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
44849	#[link_name = "llvm.x86.avx512.mask.compress.d.128"]
44850	unsafefn vpcompressd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
44851
44852	#[link_name = "llvm.x86.avx512.mask.compress.q.512"]
44853	unsafefn vpcompressq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
44854	#[link_name = "llvm.x86.avx512.mask.compress.q.256"]
44855	unsafefn vpcompressq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
44856	#[link_name = "llvm.x86.avx512.mask.compress.q.128"]
44857	unsafefn vpcompressq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
44858
44859	#[link_name = "llvm.x86.avx512.mask.compress.ps.512"]
44860	unsafefn vcompressps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
44861	#[link_name = "llvm.x86.avx512.mask.compress.ps.256"]
44862	unsafefn vcompressps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
44863	#[link_name = "llvm.x86.avx512.mask.compress.ps.128"]
44864	unsafefn vcompressps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
44865
44866	#[link_name = "llvm.x86.avx512.mask.compress.pd.512"]
44867	unsafefn vcompresspd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
44868	#[link_name = "llvm.x86.avx512.mask.compress.pd.256"]
44869	unsafefn vcompresspd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
44870	#[link_name = "llvm.x86.avx512.mask.compress.pd.128"]
44871	unsafefn vcompresspd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
44872
44873	#[link_name = "llvm.x86.avx512.mask.compress.store.d.512"]
44874	unsafefn vcompressstored(mem: *mut i8, data: i32x16, mask: u16);
44875	#[link_name = "llvm.x86.avx512.mask.compress.store.d.256"]
44876	unsafefn vcompressstored256(mem: *mut i8, data: i32x8, mask: u8);
44877	#[link_name = "llvm.x86.avx512.mask.compress.store.d.128"]
44878	unsafefn vcompressstored128(mem: *mut i8, data: i32x4, mask: u8);
44879
44880	#[link_name = "llvm.x86.avx512.mask.compress.store.q.512"]
44881	unsafefn vcompressstoreq(mem: *mut i8, data: i64x8, mask: u8);
44882	#[link_name = "llvm.x86.avx512.mask.compress.store.q.256"]
44883	unsafefn vcompressstoreq256(mem: *mut i8, data: i64x4, mask: u8);
44884	#[link_name = "llvm.x86.avx512.mask.compress.store.q.128"]
44885	unsafefn vcompressstoreq128(mem: *mut i8, data: i64x2, mask: u8);
44886
44887	#[link_name = "llvm.x86.avx512.mask.compress.store.ps.512"]
44888	unsafefn vcompressstoreps(mem: *mut i8, data: f32x16, mask: u16);
44889	#[link_name = "llvm.x86.avx512.mask.compress.store.ps.256"]
44890	unsafefn vcompressstoreps256(mem: *mut i8, data: f32x8, mask: u8);
44891	#[link_name = "llvm.x86.avx512.mask.compress.store.ps.128"]
44892	unsafefn vcompressstoreps128(mem: *mut i8, data: f32x4, mask: u8);
44893
44894	#[link_name = "llvm.x86.avx512.mask.compress.store.pd.512"]
44895	unsafefn vcompressstorepd(mem: *mut i8, data: f64x8, mask: u8);
44896	#[link_name = "llvm.x86.avx512.mask.compress.store.pd.256"]
44897	unsafefn vcompressstorepd256(mem: *mut i8, data: f64x4, mask: u8);
44898	#[link_name = "llvm.x86.avx512.mask.compress.store.pd.128"]
44899	unsafefn vcompressstorepd128(mem: *mut i8, data: f64x2, mask: u8);
44900
44901	#[link_name = "llvm.x86.avx512.mask.expand.d.512"]
44902	unsafefn vpexpandd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
44903	#[link_name = "llvm.x86.avx512.mask.expand.d.256"]
44904	unsafefn vpexpandd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
44905	#[link_name = "llvm.x86.avx512.mask.expand.d.128"]
44906	unsafefn vpexpandd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
44907
44908	#[link_name = "llvm.x86.avx512.mask.expand.q.512"]
44909	unsafefn vpexpandq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
44910	#[link_name = "llvm.x86.avx512.mask.expand.q.256"]
44911	unsafefn vpexpandq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
44912	#[link_name = "llvm.x86.avx512.mask.expand.q.128"]
44913	unsafefn vpexpandq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
44914
44915	#[link_name = "llvm.x86.avx512.mask.expand.ps.512"]
44916	unsafefn vexpandps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
44917	#[link_name = "llvm.x86.avx512.mask.expand.ps.256"]
44918	unsafefn vexpandps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
44919	#[link_name = "llvm.x86.avx512.mask.expand.ps.128"]
44920	unsafefn vexpandps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
44921
44922	#[link_name = "llvm.x86.avx512.mask.expand.pd.512"]
44923	unsafefn vexpandpd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
44924	#[link_name = "llvm.x86.avx512.mask.expand.pd.256"]
44925	unsafefn vexpandpd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
44926	#[link_name = "llvm.x86.avx512.mask.expand.pd.128"]
44927	unsafefn vexpandpd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
44928
44929	#[link_name = "llvm.x86.avx512.mask.add.ss.round"]
44930	unsafefn vaddss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
44931	#[link_name = "llvm.x86.avx512.mask.add.sd.round"]
44932	unsafefn vaddsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
44933	#[link_name = "llvm.x86.avx512.mask.sub.ss.round"]
44934	unsafefn vsubss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
44935	#[link_name = "llvm.x86.avx512.mask.sub.sd.round"]
44936	unsafefn vsubsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
44937	#[link_name = "llvm.x86.avx512.mask.mul.ss.round"]
44938	unsafefn vmulss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
44939	#[link_name = "llvm.x86.avx512.mask.mul.sd.round"]
44940	unsafefn vmulsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
44941	#[link_name = "llvm.x86.avx512.mask.div.ss.round"]
44942	unsafefn vdivss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
44943	#[link_name = "llvm.x86.avx512.mask.div.sd.round"]
44944	unsafefn vdivsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
44945	#[link_name = "llvm.x86.avx512.mask.max.ss.round"]
44946	unsafefn vmaxss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
44947	#[link_name = "llvm.x86.avx512.mask.max.sd.round"]
44948	unsafefn vmaxsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
44949	#[link_name = "llvm.x86.avx512.mask.min.ss.round"]
44950	unsafefn vminss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
44951	#[link_name = "llvm.x86.avx512.mask.min.sd.round"]
44952	unsafefn vminsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
44953	#[link_name = "llvm.x86.avx512.mask.sqrt.ss"]
44954	unsafefn vsqrtss(a: __m128, b: __m128, src: __m128, mask: u8, rounding: i32) -> __m128;
44955	#[link_name = "llvm.x86.avx512.mask.sqrt.sd"]
44956	unsafefn vsqrtsd(a: __m128d, b: __m128d, src: __m128d, mask: u8, rounding: i32) -> __m128d;
44957	#[link_name = "llvm.x86.avx512.mask.getexp.ss"]
44958	unsafefn vgetexpss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
44959	#[link_name = "llvm.x86.avx512.mask.getexp.sd"]
44960	unsafefn vgetexpsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
44961	#[link_name = "llvm.x86.avx512.mask.getmant.ss"]
44962	unsafefn vgetmantss(a: f32x4, b: f32x4, mantissas: i32, src: f32x4, m: u8, sae: i32) -> f32x4;
44963	#[link_name = "llvm.x86.avx512.mask.getmant.sd"]
44964	unsafefn vgetmantsd(a: f64x2, b: f64x2, mantissas: i32, src: f64x2, m: u8, sae: i32) -> f64x2;
44965
44966	#[link_name = "llvm.x86.avx512.rsqrt14.ss"]
44967	unsafefn vrsqrt14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
44968	#[link_name = "llvm.x86.avx512.rsqrt14.sd"]
44969	unsafefn vrsqrt14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
44970	#[link_name = "llvm.x86.avx512.rcp14.ss"]
44971	unsafefn vrcp14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
44972	#[link_name = "llvm.x86.avx512.rcp14.sd"]
44973	unsafefn vrcp14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
44974
44975	#[link_name = "llvm.x86.avx512.mask.rndscale.ss"]
44976	unsafefn vrndscaless(a: f32x4, b: f32x4, src: f32x4, mask: u8, imm8: i32, sae: i32) -> f32x4;
44977	#[link_name = "llvm.x86.avx512.mask.rndscale.sd"]
44978	unsafefn vrndscalesd(a: f64x2, b: f64x2, src: f64x2, mask: u8, imm8: i32, sae: i32) -> f64x2;
44979	#[link_name = "llvm.x86.avx512.mask.scalef.ss"]
44980	unsafefn vscalefss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
44981	#[link_name = "llvm.x86.avx512.mask.scalef.sd"]
44982	unsafefn vscalefsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
44983
44984	#[link_name = "llvm.x86.avx512.vfmadd.f32"]
44985	unsafefn vfmaddssround(a: f32, b: f32, c: f32, rounding: i32) -> f32;
44986	#[link_name = "llvm.x86.avx512.vfmadd.f64"]
44987	unsafefn vfmaddsdround(a: f64, b: f64, c: f64, rounding: i32) -> f64;
44988
44989	#[link_name = "llvm.x86.avx512.mask.fixupimm.ss"]
44990	unsafefn vfixupimmss(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
44991	#[link_name = "llvm.x86.avx512.mask.fixupimm.sd"]
44992	unsafefn vfixupimmsd(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
44993	#[link_name = "llvm.x86.avx512.maskz.fixupimm.ss"]
44994	unsafefn vfixupimmssz(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
44995	#[link_name = "llvm.x86.avx512.maskz.fixupimm.sd"]
44996	unsafefn vfixupimmsdz(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
44997
44998	#[link_name = "llvm.x86.avx512.mask.cvtss2sd.round"]
44999	unsafefn vcvtss2sd(a: f64x2, b: f32x4, src: f64x2, mask: u8, sae: i32) -> f64x2;
45000	#[link_name = "llvm.x86.avx512.mask.cvtsd2ss.round"]
45001	unsafefn vcvtsd2ss(a: f32x4, b: f64x2, src: f32x4, mask: u8, rounding: i32) -> f32x4;
45002
45003	#[link_name = "llvm.x86.avx512.vcvtss2si32"]
45004	unsafefn vcvtss2si(a: f32x4, rounding: i32) -> i32;
45005	#[link_name = "llvm.x86.avx512.vcvtss2usi32"]
45006	unsafefn vcvtss2usi(a: f32x4, rounding: i32) -> u32;
45007
45008	#[link_name = "llvm.x86.avx512.vcvtsd2si32"]
45009	unsafefn vcvtsd2si(a: f64x2, rounding: i32) -> i32;
45010	#[link_name = "llvm.x86.avx512.vcvtsd2usi32"]
45011	unsafefn vcvtsd2usi(a: f64x2, rounding: i32) -> u32;
45012
45013	#[link_name = "llvm.x86.avx512.cvtsi2ss32"]
45014	unsafefn vcvtsi2ss(a: f32x4, b: i32, rounding: i32) -> f32x4;
45015
45016	#[link_name = "llvm.x86.avx512.cvtusi2ss"]
45017	unsafefn vcvtusi2ss(a: f32x4, b: u32, rounding: i32) -> f32x4;
45018
45019	#[link_name = "llvm.x86.avx512.cvttss2si"]
45020	unsafefn vcvttss2si(a: f32x4, rounding: i32) -> i32;
45021	#[link_name = "llvm.x86.avx512.cvttss2usi"]
45022	unsafefn vcvttss2usi(a: f32x4, rounding: i32) -> u32;
45023
45024	#[link_name = "llvm.x86.avx512.cvttsd2si"]
45025	unsafefn vcvttsd2si(a: f64x2, rounding: i32) -> i32;
45026	#[link_name = "llvm.x86.avx512.cvttsd2usi"]
45027	unsafefn vcvttsd2usi(a: f64x2, rounding: i32) -> u32;
45028
45029	#[link_name = "llvm.x86.avx512.vcomi.ss"]
45030	unsafefn vcomiss(a: f32x4, b: f32x4, imm8: i32, sae: i32) -> i32;
45031	#[link_name = "llvm.x86.avx512.vcomi.sd"]
45032	unsafefn vcomisd(a: f64x2, b: f64x2, imm8: i32, sae: i32) -> i32;
45033
45034	#[link_name = "llvm.x86.avx512.mask.expand.load.d.128"]
45035	unsafefn expandloadd_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
45036	#[link_name = "llvm.x86.avx512.mask.expand.load.q.128"]
45037	unsafefn expandloadq_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
45038	#[link_name = "llvm.x86.avx512.mask.expand.load.ps.128"]
45039	unsafefn expandloadps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
45040	#[link_name = "llvm.x86.avx512.mask.expand.load.pd.128"]
45041	unsafefn expandloadpd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
45042	#[link_name = "llvm.x86.avx512.mask.expand.load.d.256"]
45043	unsafefn expandloadd_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
45044	#[link_name = "llvm.x86.avx512.mask.expand.load.q.256"]
45045	unsafefn expandloadq_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
45046	#[link_name = "llvm.x86.avx512.mask.expand.load.ps.256"]
45047	unsafefn expandloadps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
45048	#[link_name = "llvm.x86.avx512.mask.expand.load.pd.256"]
45049	unsafefn expandloadpd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
45050	#[link_name = "llvm.x86.avx512.mask.expand.load.d.512"]
45051	unsafefn expandloadd_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
45052	#[link_name = "llvm.x86.avx512.mask.expand.load.q.512"]
45053	unsafefn expandloadq_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
45054	#[link_name = "llvm.x86.avx512.mask.expand.load.ps.512"]
45055	unsafefn expandloadps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
45056	#[link_name = "llvm.x86.avx512.mask.expand.load.pd.512"]
45057	unsafefn expandloadpd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
45058
45059	}
45060
45061	#[cfg(test)]
45062	mod tests {
45063	use crate::core_arch::assert_eq_const as assert_eq;
45064
45065	use stdarch_test::simd_test;
45066
45067	use crate::core_arch::x86::*;
45068	use crate::hint::black_box;
45069	use crate::mem::{self};
45070
45071	#[simd_test(enable = "avx512f")]
45072	const fn test_mm512_abs_epi32() {
45073	#[rustfmt::skip]
45074	let a = _mm512_setr_epi32(
45075	`0`, `1`, `-1`, i32::MAX,
45076	i32::MIN, `100`, `-100`, `-32`,
45077	`0`, `1`, `-1`, i32::MAX,
45078	i32::MIN, `100`, `-100`, `-32`,
45079	);
45080	let r = _mm512_abs_epi32(a);
45081	#[rustfmt::skip]
45082	let e = _mm512_setr_epi32(
45083	`0`, `1`, `1`, i32::MAX,
45084	i32::MAX.wrapping_add(`1`), `100`, `100`, `32`,
45085	`0`, `1`, `1`, i32::MAX,
45086	i32::MAX.wrapping_add(`1`), `100`, `100`, `32`,
45087	);
45088	assert_eq_m512i(r, e);
45089	}
45090
45091	#[simd_test(enable = "avx512f")]
45092	const fn test_mm512_mask_abs_epi32() {
45093	#[rustfmt::skip]
45094	let a = _mm512_setr_epi32(
45095	`0`, `1`, `-1`, i32::MAX,
45096	i32::MIN, `100`, `-100`, `-32`,
45097	`0`, `1`, `-1`, i32::MAX,
45098	i32::MIN, `100`, `-100`, `-32`,
45099	);
45100	let r = _mm512_mask_abs_epi32(a, `0`, a);
45101	assert_eq_m512i(r, a);
45102	let r = _mm512_mask_abs_epi32(a, `0b00000000_11111111`, a);
45103	#[rustfmt::skip]
45104	let e = _mm512_setr_epi32(
45105	`0`, `1`, `1`, i32::MAX,
45106	i32::MAX.wrapping_add(`1`), `100`, `100`, `32`,
45107	`0`, `1`, `-1`, i32::MAX,
45108	i32::MIN, `100`, `-100`, `-32`,
45109	);
45110	assert_eq_m512i(r, e);
45111	}
45112
45113	#[simd_test(enable = "avx512f")]
45114	const fn test_mm512_maskz_abs_epi32() {
45115	#[rustfmt::skip]
45116	let a = _mm512_setr_epi32(
45117	`0`, `1`, `-1`, i32::MAX,
45118	i32::MIN, `100`, `-100`, `-32`,
45119	`0`, `1`, `-1`, i32::MAX,
45120	i32::MIN, `100`, `-100`, `-32`,
45121	);
45122	let r = _mm512_maskz_abs_epi32(`0`, a);
45123	assert_eq_m512i(r, _mm512_setzero_si512());
45124	let r = _mm512_maskz_abs_epi32(`0b00000000_11111111`, a);
45125	#[rustfmt::skip]
45126	let e = _mm512_setr_epi32(
45127	`0`, `1`, `1`, i32::MAX,
45128	i32::MAX.wrapping_add(`1`), `100`, `100`, `32`,
45129	`0`, `0`, `0`, `0`,
45130	`0`, `0`, `0`, `0`,
45131	);
45132	assert_eq_m512i(r, e);
45133	}
45134
45135	#[simd_test(enable = "avx512f,avx512vl")]
45136	const fn test_mm256_mask_abs_epi32() {
45137	#[rustfmt::skip]
45138	let a = _mm256_setr_epi32(
45139	`0`, `1`, `-1`, i32::MAX,
45140	i32::MIN, `100`, `-100`, `-32`,
45141	);
45142	let r = _mm256_mask_abs_epi32(a, `0`, a);
45143	assert_eq_m256i(r, a);
45144	let r = _mm256_mask_abs_epi32(a, `0b00001111`, a);
45145	#[rustfmt::skip]
45146	let e = _mm256_setr_epi32(
45147	`0`, `1`, `1`, i32::MAX,
45148	i32::MAX.wrapping_add(`1`), `100`, `-100`, `-32`,
45149	);
45150	assert_eq_m256i(r, e);
45151	}
45152
45153	#[simd_test(enable = "avx512f,avx512vl")]
45154	const fn test_mm256_maskz_abs_epi32() {
45155	#[rustfmt::skip]
45156	let a = _mm256_setr_epi32(
45157	`0`, `1`, `-1`, i32::MAX,
45158	i32::MIN, `100`, `-100`, `-32`,
45159	);
45160	let r = _mm256_maskz_abs_epi32(`0`, a);
45161	assert_eq_m256i(r, _mm256_setzero_si256());
45162	let r = _mm256_maskz_abs_epi32(`0b00001111`, a);
45163	#[rustfmt::skip]
45164	let e = _mm256_setr_epi32(
45165	`0`, `1`, `1`, i32::MAX,
45166	`0`, `0`, `0`, `0`,
45167	);
45168	assert_eq_m256i(r, e);
45169	}
45170
45171	#[simd_test(enable = "avx512f,avx512vl")]
45172	const fn test_mm_mask_abs_epi32() {
45173	let a = _mm_setr_epi32(i32::MIN, `100`, `-100`, `-32`);
45174	let r = _mm_mask_abs_epi32(a, `0`, a);
45175	assert_eq_m128i(r, a);
45176	let r = _mm_mask_abs_epi32(a, `0b00001111`, a);
45177	let e = _mm_setr_epi32(i32::MAX.wrapping_add(`1`), `100`, `100`, `32`);
45178	assert_eq_m128i(r, e);
45179	}
45180
45181	#[simd_test(enable = "avx512f,avx512vl")]
45182	const fn test_mm_maskz_abs_epi32() {
45183	let a = _mm_setr_epi32(i32::MIN, `100`, `-100`, `-32`);
45184	let r = _mm_maskz_abs_epi32(`0`, a);
45185	assert_eq_m128i(r, _mm_setzero_si128());
45186	let r = _mm_maskz_abs_epi32(`0b00001111`, a);
45187	let e = _mm_setr_epi32(i32::MAX.wrapping_add(`1`), `100`, `100`, `32`);
45188	assert_eq_m128i(r, e);
45189	}
45190
45191	#[simd_test(enable = "avx512f")]
45192	const fn test_mm512_abs_ps() {
45193	#[rustfmt::skip]
45194	let a = _mm512_setr_ps(
45195	`0.`, `1.`, `-1.`, f32::MAX,
45196	f32::MIN, `100.`, `-100.`, `-32.`,
45197	`0.`, `1.`, `-1.`, f32::MAX,
45198	f32::MIN, `100.`, `-100.`, `-32.`,
45199	);
45200	let r = _mm512_abs_ps(a);
45201	#[rustfmt::skip]
45202	let e = _mm512_setr_ps(
45203	`0.`, `1.`, `1.`, f32::MAX,
45204	f32::MAX, `100.`, `100.`, `32.`,
45205	`0.`, `1.`, `1.`, f32::MAX,
45206	f32::MAX, `100.`, `100.`, `32.`,
45207	);
45208	assert_eq_m512(r, e);
45209	}
45210
45211	#[simd_test(enable = "avx512f")]
45212	const fn test_mm512_mask_abs_ps() {
45213	#[rustfmt::skip]
45214	let a = _mm512_setr_ps(
45215	`0.`, `1.`, `-1.`, f32::MAX,
45216	f32::MIN, `100.`, `-100.`, `-32.`,
45217	`0.`, `1.`, `-1.`, f32::MAX,
45218	f32::MIN, `100.`, `-100.`, `-32.`,
45219	);
45220	let r = _mm512_mask_abs_ps(a, `0`, a);
45221	assert_eq_m512(r, a);
45222	let r = _mm512_mask_abs_ps(a, `0b00000000_11111111`, a);
45223	#[rustfmt::skip]
45224	let e = _mm512_setr_ps(
45225	`0.`, `1.`, `1.`, f32::MAX,
45226	f32::MAX, `100.`, `100.`, `32.`,
45227	`0.`, `1.`, `-1.`, f32::MAX,
45228	f32::MIN, `100.`, `-100.`, `-32.`,
45229	);
45230	assert_eq_m512(r, e);
45231	}
45232
45233	#[simd_test(enable = "avx512f")]
45234	const fn test_mm512_mask_mov_epi32() {
45235	let src = _mm512_set1_epi32(`1`);
45236	let a = _mm512_set1_epi32(`2`);
45237	let r = _mm512_mask_mov_epi32(src, `0`, a);
45238	assert_eq_m512i(r, src);
45239	let r = _mm512_mask_mov_epi32(src, `0b11111111_11111111`, a);
45240	assert_eq_m512i(r, a);
45241	}
45242
45243	#[simd_test(enable = "avx512f")]
45244	const fn test_mm512_maskz_mov_epi32() {
45245	let a = _mm512_set1_epi32(`2`);
45246	let r = _mm512_maskz_mov_epi32(`0`, a);
45247	assert_eq_m512i(r, _mm512_setzero_si512());
45248	let r = _mm512_maskz_mov_epi32(`0b11111111_11111111`, a);
45249	assert_eq_m512i(r, a);
45250	}
45251
45252	#[simd_test(enable = "avx512f,avx512vl")]
45253	const fn test_mm256_mask_mov_epi32() {
45254	let src = _mm256_set1_epi32(`1`);
45255	let a = _mm256_set1_epi32(`2`);
45256	let r = _mm256_mask_mov_epi32(src, `0`, a);
45257	assert_eq_m256i(r, src);
45258	let r = _mm256_mask_mov_epi32(src, `0b11111111`, a);
45259	assert_eq_m256i(r, a);
45260	}
45261
45262	#[simd_test(enable = "avx512f,avx512vl")]
45263	const fn test_mm256_maskz_mov_epi32() {
45264	let a = _mm256_set1_epi32(`2`);
45265	let r = _mm256_maskz_mov_epi32(`0`, a);
45266	assert_eq_m256i(r, _mm256_setzero_si256());
45267	let r = _mm256_maskz_mov_epi32(`0b11111111`, a);
45268	assert_eq_m256i(r, a);
45269	}
45270
45271	#[simd_test(enable = "avx512f,avx512vl")]
45272	const fn test_mm_mask_mov_epi32() {
45273	let src = _mm_set1_epi32(`1`);
45274	let a = _mm_set1_epi32(`2`);
45275	let r = _mm_mask_mov_epi32(src, `0`, a);
45276	assert_eq_m128i(r, src);
45277	let r = _mm_mask_mov_epi32(src, `0b00001111`, a);
45278	assert_eq_m128i(r, a);
45279	}
45280
45281	#[simd_test(enable = "avx512f,avx512vl")]
45282	const fn test_mm_maskz_mov_epi32() {
45283	let a = _mm_set1_epi32(`2`);
45284	let r = _mm_maskz_mov_epi32(`0`, a);
45285	assert_eq_m128i(r, _mm_setzero_si128());
45286	let r = _mm_maskz_mov_epi32(`0b00001111`, a);
45287	assert_eq_m128i(r, a);
45288	}
45289
45290	#[simd_test(enable = "avx512f")]
45291	const fn test_mm512_mask_mov_ps() {
45292	let src = _mm512_set1_ps(`1.`);
45293	let a = _mm512_set1_ps(`2.`);
45294	let r = _mm512_mask_mov_ps(src, `0`, a);
45295	assert_eq_m512(r, src);
45296	let r = _mm512_mask_mov_ps(src, `0b11111111_11111111`, a);
45297	assert_eq_m512(r, a);
45298	}
45299
45300	#[simd_test(enable = "avx512f")]
45301	const fn test_mm512_maskz_mov_ps() {
45302	let a = _mm512_set1_ps(`2.`);
45303	let r = _mm512_maskz_mov_ps(`0`, a);
45304	assert_eq_m512(r, _mm512_setzero_ps());
45305	let r = _mm512_maskz_mov_ps(`0b11111111_11111111`, a);
45306	assert_eq_m512(r, a);
45307	}
45308
45309	#[simd_test(enable = "avx512f,avx512vl")]
45310	const fn test_mm256_mask_mov_ps() {
45311	let src = _mm256_set1_ps(`1.`);
45312	let a = _mm256_set1_ps(`2.`);
45313	let r = _mm256_mask_mov_ps(src, `0`, a);
45314	assert_eq_m256(r, src);
45315	let r = _mm256_mask_mov_ps(src, `0b11111111`, a);
45316	assert_eq_m256(r, a);
45317	}
45318
45319	#[simd_test(enable = "avx512f,avx512vl")]
45320	const fn test_mm256_maskz_mov_ps() {
45321	let a = _mm256_set1_ps(`2.`);
45322	let r = _mm256_maskz_mov_ps(`0`, a);
45323	assert_eq_m256(r, _mm256_setzero_ps());
45324	let r = _mm256_maskz_mov_ps(`0b11111111`, a);
45325	assert_eq_m256(r, a);
45326	}
45327
45328	#[simd_test(enable = "avx512f,avx512vl")]
45329	const fn test_mm_mask_mov_ps() {
45330	let src = _mm_set1_ps(`1.`);
45331	let a = _mm_set1_ps(`2.`);
45332	let r = _mm_mask_mov_ps(src, `0`, a);
45333	assert_eq_m128(r, src);
45334	let r = _mm_mask_mov_ps(src, `0b00001111`, a);
45335	assert_eq_m128(r, a);
45336	}
45337
45338	#[simd_test(enable = "avx512f,avx512vl")]
45339	const fn test_mm_maskz_mov_ps() {
45340	let a = _mm_set1_ps(`2.`);
45341	let r = _mm_maskz_mov_ps(`0`, a);
45342	assert_eq_m128(r, _mm_setzero_ps());
45343	let r = _mm_maskz_mov_ps(`0b00001111`, a);
45344	assert_eq_m128(r, a);
45345	}
45346
45347	#[simd_test(enable = "avx512f")]
45348	const fn test_mm512_add_epi32() {
45349	#[rustfmt::skip]
45350	let a = _mm512_setr_epi32(
45351	`0`, `1`, `-1`, i32::MAX,
45352	i32::MIN, `100`, `-100`, `-32`,
45353	`0`, `1`, `-1`, i32::MAX,
45354	i32::MIN, `100`, `-100`, `-32`,
45355	);
45356	let b = _mm512_set1_epi32(`1`);
45357	let r = _mm512_add_epi32(a, b);
45358	#[rustfmt::skip]
45359	let e = _mm512_setr_epi32(
45360	`1`, `2`, `0`, i32::MIN,
45361	i32::MIN + `1`, `101`, `-99`, `-31`,
45362	`1`, `2`, `0`, i32::MIN,
45363	i32::MIN + `1`, `101`, `-99`, `-31`,
45364	);
45365	assert_eq_m512i(r, e);
45366	}
45367
45368	#[simd_test(enable = "avx512f")]
45369	const fn test_mm512_mask_add_epi32() {
45370	#[rustfmt::skip]
45371	let a = _mm512_setr_epi32(
45372	`0`, `1`, `-1`, i32::MAX,
45373	i32::MIN, `100`, `-100`, `-32`,
45374	`0`, `1`, `-1`, i32::MAX,
45375	i32::MIN, `100`, `-100`, `-32`,
45376	);
45377	let b = _mm512_set1_epi32(`1`);
45378	let r = _mm512_mask_add_epi32(a, `0`, a, b);
45379	assert_eq_m512i(r, a);
45380	let r = _mm512_mask_add_epi32(a, `0b00000000_11111111`, a, b);
45381	#[rustfmt::skip]
45382	let e = _mm512_setr_epi32(
45383	`1`, `2`, `0`, i32::MIN,
45384	i32::MIN + `1`, `101`, `-99`, `-31`,
45385	`0`, `1`, `-1`, i32::MAX,
45386	i32::MIN, `100`, `-100`, `-32`,
45387	);
45388	assert_eq_m512i(r, e);
45389	}
45390
45391	#[simd_test(enable = "avx512f")]
45392	const fn test_mm512_maskz_add_epi32() {
45393	#[rustfmt::skip]
45394	let a = _mm512_setr_epi32(
45395	`0`, `1`, `-1`, i32::MAX,
45396	i32::MIN, `100`, `-100`, `-32`,
45397	`0`, `1`, `-1`, i32::MAX,
45398	i32::MIN, `100`, `-100`, `-32`,
45399	);
45400	let b = _mm512_set1_epi32(`1`);
45401	let r = _mm512_maskz_add_epi32(`0`, a, b);
45402	assert_eq_m512i(r, _mm512_setzero_si512());
45403	let r = _mm512_maskz_add_epi32(`0b00000000_11111111`, a, b);
45404	#[rustfmt::skip]
45405	let e = _mm512_setr_epi32(
45406	`1`, `2`, `0`, i32::MIN,
45407	i32::MIN + `1`, `101`, `-99`, `-31`,
45408	`0`, `0`, `0`, `0`,
45409	`0`, `0`, `0`, `0`,
45410	);
45411	assert_eq_m512i(r, e);
45412	}
45413
45414	#[simd_test(enable = "avx512f,avx512vl")]
45415	const fn test_mm256_mask_add_epi32() {
45416	let a = _mm256_set_epi32(`0`, `1`, `-1`, i32::MAX, i32::MIN, `100`, `-100`, `-32`);
45417	let b = _mm256_set1_epi32(`1`);
45418	let r = _mm256_mask_add_epi32(a, `0`, a, b);
45419	assert_eq_m256i(r, a);
45420	let r = _mm256_mask_add_epi32(a, `0b11111111`, a, b);
45421	let e = _mm256_set_epi32(`1`, `2`, `0`, i32::MIN, i32::MIN + `1`, `101`, `-99`, `-31`);
45422	assert_eq_m256i(r, e);
45423	}
45424
45425	#[simd_test(enable = "avx512f,avx512vl")]
45426	const fn test_mm256_maskz_add_epi32() {
45427	let a = _mm256_setr_epi32(`0`, `1`, `-1`, i32::MAX, i32::MIN, `100`, `-100`, `-32`);
45428	let b = _mm256_set1_epi32(`1`);
45429	let r = _mm256_maskz_add_epi32(`0`, a, b);
45430	assert_eq_m256i(r, _mm256_setzero_si256());
45431	let r = _mm256_maskz_add_epi32(`0b11111111`, a, b);
45432	let e = _mm256_setr_epi32(`1`, `2`, `0`, i32::MIN, i32::MIN + `1`, `101`, `-99`, `-31`);
45433	assert_eq_m256i(r, e);
45434	}
45435
45436	#[simd_test(enable = "avx512f,avx512vl")]
45437	const fn test_mm_mask_add_epi32() {
45438	let a = _mm_set_epi32(`1`, `-1`, i32::MAX, i32::MIN);
45439	let b = _mm_set1_epi32(`1`);
45440	let r = _mm_mask_add_epi32(a, `0`, a, b);
45441	assert_eq_m128i(r, a);
45442	let r = _mm_mask_add_epi32(a, `0b00001111`, a, b);
45443	let e = _mm_set_epi32(`2`, `0`, i32::MIN, i32::MIN + `1`);
45444	assert_eq_m128i(r, e);
45445	}
45446
45447	#[simd_test(enable = "avx512f,avx512vl")]
45448	const fn test_mm_maskz_add_epi32() {
45449	let a = _mm_setr_epi32(`1`, `-1`, i32::MAX, i32::MIN);
45450	let b = _mm_set1_epi32(`1`);
45451	let r = _mm_maskz_add_epi32(`0`, a, b);
45452	assert_eq_m128i(r, _mm_setzero_si128());
45453	let r = _mm_maskz_add_epi32(`0b00001111`, a, b);
45454	let e = _mm_setr_epi32(`2`, `0`, i32::MIN, i32::MIN + `1`);
45455	assert_eq_m128i(r, e);
45456	}
45457
45458	#[simd_test(enable = "avx512f")]
45459	const fn test_mm512_add_ps() {
45460	#[rustfmt::skip]
45461	let a = _mm512_setr_ps(
45462	`0.`, `1.`, `-1.`, f32::MAX,
45463	f32::MIN, `100.`, `-100.`, `-32.`,
45464	`0.`, `1.`, `-1.`, f32::MAX,
45465	f32::MIN, `100.`, `-100.`, `-32.`,
45466	);
45467	let b = _mm512_set1_ps(`1.`);
45468	let r = _mm512_add_ps(a, b);
45469	#[rustfmt::skip]
45470	let e = _mm512_setr_ps(
45471	`1.`, `2.`, `0.`, f32::MAX,
45472	f32::MIN + `1.`, `101.`, `-99.`, `-31.`,
45473	`1.`, `2.`, `0.`, f32::MAX,
45474	f32::MIN + `1.`, `101.`, `-99.`, `-31.`,
45475	);
45476	assert_eq_m512(r, e);
45477	}
45478
45479	#[simd_test(enable = "avx512f")]
45480	const fn test_mm512_mask_add_ps() {
45481	#[rustfmt::skip]
45482	let a = _mm512_setr_ps(
45483	`0.`, `1.`, `-1.`, f32::MAX,
45484	f32::MIN, `100.`, `-100.`, `-32.`,
45485	`0.`, `1.`, `-1.`, f32::MAX,
45486	f32::MIN, `100.`, `-100.`, `-32.`,
45487	);
45488	let b = _mm512_set1_ps(`1.`);
45489	let r = _mm512_mask_add_ps(a, `0`, a, b);
45490	assert_eq_m512(r, a);
45491	let r = _mm512_mask_add_ps(a, `0b00000000_11111111`, a, b);
45492	#[rustfmt::skip]
45493	let e = _mm512_setr_ps(
45494	`1.`, `2.`, `0.`, f32::MAX,
45495	f32::MIN + `1.`, `101.`, `-99.`, `-31.`,
45496	`0.`, `1.`, `-1.`, f32::MAX,
45497	f32::MIN, `100.`, `-100.`, `-32.`,
45498	);
45499	assert_eq_m512(r, e);
45500	}
45501
45502	#[simd_test(enable = "avx512f")]
45503	const fn test_mm512_maskz_add_ps() {
45504	#[rustfmt::skip]
45505	let a = _mm512_setr_ps(
45506	`0.`, `1.`, `-1.`, f32::MAX,
45507	f32::MIN, `100.`, `-100.`, `-32.`,
45508	`0.`, `1.`, `-1.`, f32::MAX,
45509	f32::MIN, `100.`, `-100.`, `-32.`,
45510	);
45511	let b = _mm512_set1_ps(`1.`);
45512	let r = _mm512_maskz_add_ps(`0`, a, b);
45513	assert_eq_m512(r, _mm512_setzero_ps());
45514	let r = _mm512_maskz_add_ps(`0b00000000_11111111`, a, b);
45515	#[rustfmt::skip]
45516	let e = _mm512_setr_ps(
45517	`1.`, `2.`, `0.`, f32::MAX,
45518	f32::MIN + `1.`, `101.`, `-99.`, `-31.`,
45519	`0.`, `0.`, `0.`, `0.`,
45520	`0.`, `0.`, `0.`, `0.`,
45521	);
45522	assert_eq_m512(r, e);
45523	}
45524
45525	#[simd_test(enable = "avx512f,avx512vl")]
45526	const fn test_mm256_mask_add_ps() {
45527	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::MIN, `100.`, `-100.`, `-32.`);
45528	let b = _mm256_set1_ps(`1.`);
45529	let r = _mm256_mask_add_ps(a, `0`, a, b);
45530	assert_eq_m256(r, a);
45531	let r = _mm256_mask_add_ps(a, `0b11111111`, a, b);
45532	let e = _mm256_set_ps(`1.`, `2.`, `0.`, f32::MAX, f32::MIN + `1.`, `101.`, `-99.`, `-31.`);
45533	assert_eq_m256(r, e);
45534	}
45535
45536	#[simd_test(enable = "avx512f,avx512vl")]
45537	const fn test_mm256_maskz_add_ps() {
45538	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::MIN, `100.`, `-100.`, `-32.`);
45539	let b = _mm256_set1_ps(`1.`);
45540	let r = _mm256_maskz_add_ps(`0`, a, b);
45541	assert_eq_m256(r, _mm256_setzero_ps());
45542	let r = _mm256_maskz_add_ps(`0b11111111`, a, b);
45543	let e = _mm256_set_ps(`1.`, `2.`, `0.`, f32::MAX, f32::MIN + `1.`, `101.`, `-99.`, `-31.`);
45544	assert_eq_m256(r, e);
45545	}
45546
45547	#[simd_test(enable = "avx512f,avx512vl")]
45548	const fn test_mm_mask_add_ps() {
45549	let a = _mm_set_ps(`1.`, `-1.`, f32::MAX, f32::MIN);
45550	let b = _mm_set1_ps(`1.`);
45551	let r = _mm_mask_add_ps(a, `0`, a, b);
45552	assert_eq_m128(r, a);
45553	let r = _mm_mask_add_ps(a, `0b00001111`, a, b);
45554	let e = _mm_set_ps(`2.`, `0.`, f32::MAX, f32::MIN + `1.`);
45555	assert_eq_m128(r, e);
45556	}
45557
45558	#[simd_test(enable = "avx512f,avx512vl")]
45559	const fn test_mm_maskz_add_ps() {
45560	let a = _mm_set_ps(`1.`, `-1.`, f32::MAX, f32::MIN);
45561	let b = _mm_set1_ps(`1.`);
45562	let r = _mm_maskz_add_ps(`0`, a, b);
45563	assert_eq_m128(r, _mm_setzero_ps());
45564	let r = _mm_maskz_add_ps(`0b00001111`, a, b);
45565	let e = _mm_set_ps(`2.`, `0.`, f32::MAX, f32::MIN + `1.`);
45566	assert_eq_m128(r, e);
45567	}
45568
45569	#[simd_test(enable = "avx512f")]
45570	const fn test_mm512_sub_epi32() {
45571	#[rustfmt::skip]
45572	let a = _mm512_setr_epi32(
45573	`0`, `1`, `-1`, i32::MAX,
45574	i32::MIN, `100`, `-100`, `-32`,
45575	`0`, `1`, `-1`, i32::MAX,
45576	i32::MIN, `100`, `-100`, `-32`,
45577	);
45578	let b = _mm512_set1_epi32(`1`);
45579	let r = _mm512_sub_epi32(a, b);
45580	#[rustfmt::skip]
45581	let e = _mm512_setr_epi32(
45582	`-1`, `0`, `-2`, i32::MAX - `1`,
45583	i32::MAX, `99`, `-101`, `-33`,
45584	`-1`, `0`, `-2`, i32::MAX - `1`,
45585	i32::MAX, `99`, `-101`, `-33`,
45586	);
45587	assert_eq_m512i(r, e);
45588	}
45589
45590	#[simd_test(enable = "avx512f")]
45591	const fn test_mm512_mask_sub_epi32() {
45592	#[rustfmt::skip]
45593	let a = _mm512_setr_epi32(
45594	`0`, `1`, `-1`, i32::MAX,
45595	i32::MIN, `100`, `-100`, `-32`,
45596	`0`, `1`, `-1`, i32::MAX,
45597	i32::MIN, `100`, `-100`, `-32`,
45598	);
45599	let b = _mm512_set1_epi32(`1`);
45600	let r = _mm512_mask_sub_epi32(a, `0`, a, b);
45601	assert_eq_m512i(r, a);
45602	let r = _mm512_mask_sub_epi32(a, `0b00000000_11111111`, a, b);
45603	#[rustfmt::skip]
45604	let e = _mm512_setr_epi32(
45605	`-1`, `0`, `-2`, i32::MAX - `1`,
45606	i32::MAX, `99`, `-101`, `-33`,
45607	`0`, `1`, `-1`, i32::MAX,
45608	i32::MIN, `100`, `-100`, `-32`,
45609	);
45610	assert_eq_m512i(r, e);
45611	}
45612
45613	#[simd_test(enable = "avx512f")]
45614	const fn test_mm512_maskz_sub_epi32() {
45615	#[rustfmt::skip]
45616	let a = _mm512_setr_epi32(
45617	`0`, `1`, `-1`, i32::MAX,
45618	i32::MIN, `100`, `-100`, `-32`,
45619	`0`, `1`, `-1`, i32::MAX,
45620	i32::MIN, `100`, `-100`, `-32`,
45621	);
45622	let b = _mm512_set1_epi32(`1`);
45623	let r = _mm512_maskz_sub_epi32(`0`, a, b);
45624	assert_eq_m512i(r, _mm512_setzero_si512());
45625	let r = _mm512_maskz_sub_epi32(`0b00000000_11111111`, a, b);
45626	#[rustfmt::skip]
45627	let e = _mm512_setr_epi32(
45628	`-1`, `0`, `-2`, i32::MAX - `1`,
45629	i32::MAX, `99`, `-101`, `-33`,
45630	`0`, `0`, `0`, `0`,
45631	`0`, `0`, `0`, `0`,
45632	);
45633	assert_eq_m512i(r, e);
45634	}
45635
45636	#[simd_test(enable = "avx512f,avx512vl")]
45637	const fn test_mm256_mask_sub_epi32() {
45638	let a = _mm256_set_epi32(`0`, `1`, `-1`, i32::MAX, i32::MIN, `100`, `-100`, `-32`);
45639	let b = _mm256_set1_epi32(`1`);
45640	let r = _mm256_mask_sub_epi32(a, `0`, a, b);
45641	assert_eq_m256i(r, a);
45642	let r = _mm256_mask_sub_epi32(a, `0b11111111`, a, b);
45643	let e = _mm256_set_epi32(`-1`, `0`, `-2`, i32::MAX - `1`, i32::MAX, `99`, `-101`, `-33`);
45644	assert_eq_m256i(r, e);
45645	}
45646
45647	#[simd_test(enable = "avx512f,avx512vl")]
45648	const fn test_mm256_maskz_sub_epi32() {
45649	let a = _mm256_set_epi32(`0`, `1`, `-1`, i32::MAX, i32::MIN, `100`, `-100`, `-32`);
45650	let b = _mm256_set1_epi32(`1`);
45651	let r = _mm256_maskz_sub_epi32(`0`, a, b);
45652	assert_eq_m256i(r, _mm256_setzero_si256());
45653	let r = _mm256_maskz_sub_epi32(`0b11111111`, a, b);
45654	let e = _mm256_set_epi32(`-1`, `0`, `-2`, i32::MAX - `1`, i32::MAX, `99`, `-101`, `-33`);
45655	assert_eq_m256i(r, e);
45656	}
45657
45658	#[simd_test(enable = "avx512f,avx512vl")]
45659	const fn test_mm_mask_sub_epi32() {
45660	let a = _mm_set_epi32(`1`, `-1`, i32::MAX, i32::MIN);
45661	let b = _mm_set1_epi32(`1`);
45662	let r = _mm_mask_sub_epi32(a, `0`, a, b);
45663	assert_eq_m128i(r, a);
45664	let r = _mm_mask_sub_epi32(a, `0b00001111`, a, b);
45665	let e = _mm_set_epi32(`0`, `-2`, i32::MAX - `1`, i32::MAX);
45666	assert_eq_m128i(r, e);
45667	}
45668
45669	#[simd_test(enable = "avx512f,avx512vl")]
45670	const fn test_mm_maskz_sub_epi32() {
45671	let a = _mm_set_epi32(`1`, `-1`, i32::MAX, i32::MIN);
45672	let b = _mm_set1_epi32(`1`);
45673	let r = _mm_maskz_sub_epi32(`0`, a, b);
45674	assert_eq_m128i(r, _mm_setzero_si128());
45675	let r = _mm_maskz_sub_epi32(`0b00001111`, a, b);
45676	let e = _mm_set_epi32(`0`, `-2`, i32::MAX - `1`, i32::MAX);
45677	assert_eq_m128i(r, e);
45678	}
45679
45680	#[simd_test(enable = "avx512f")]
45681	const fn test_mm512_sub_ps() {
45682	#[rustfmt::skip]
45683	let a = _mm512_setr_ps(
45684	`0.`, `1.`, `-1.`, f32::MAX,
45685	f32::MIN, `100.`, `-100.`, `-32.`,
45686	`0.`, `1.`, `-1.`, f32::MAX,
45687	f32::MIN, `100.`, `-100.`, `-32.`,
45688	);
45689	let b = _mm512_set1_ps(`1.`);
45690	let r = _mm512_sub_ps(a, b);
45691	#[rustfmt::skip]
45692	let e = _mm512_setr_ps(
45693	`-1.`, `0.`, `-2.`, f32::MAX - `1.`,
45694	f32::MIN, `99.`, `-101.`, `-33.`,
45695	`-1.`, `0.`, `-2.`, f32::MAX - `1.`,
45696	f32::MIN, `99.`, `-101.`, `-33.`,
45697	);
45698	assert_eq_m512(r, e);
45699	}
45700
45701	#[simd_test(enable = "avx512f")]
45702	const fn test_mm512_mask_sub_ps() {
45703	#[rustfmt::skip]
45704	let a = _mm512_setr_ps(
45705	`0.`, `1.`, `-1.`, f32::MAX,
45706	f32::MIN, `100.`, `-100.`, `-32.`,
45707	`0.`, `1.`, `-1.`, f32::MAX,
45708	f32::MIN, `100.`, `-100.`, `-32.`,
45709	);
45710	let b = _mm512_set1_ps(`1.`);
45711	let r = _mm512_mask_sub_ps(a, `0`, a, b);
45712	assert_eq_m512(r, a);
45713	let r = _mm512_mask_sub_ps(a, `0b00000000_11111111`, a, b);
45714	#[rustfmt::skip]
45715	let e = _mm512_setr_ps(
45716	`-1.`, `0.`, `-2.`, f32::MAX - `1.`,
45717	f32::MIN, `99.`, `-101.`, `-33.`,
45718	`0.`, `1.`, `-1.`, f32::MAX,
45719	f32::MIN, `100.`, `-100.`, `-32.`,
45720	);
45721	assert_eq_m512(r, e);
45722	}
45723
45724	#[simd_test(enable = "avx512f")]
45725	const fn test_mm512_maskz_sub_ps() {
45726	#[rustfmt::skip]
45727	let a = _mm512_setr_ps(
45728	`0.`, `1.`, `-1.`, f32::MAX,
45729	f32::MIN, `100.`, `-100.`, `-32.`,
45730	`0.`, `1.`, `-1.`, f32::MAX,
45731	f32::MIN, `100.`, `-100.`, `-32.`,
45732	);
45733	let b = _mm512_set1_ps(`1.`);
45734	let r = _mm512_maskz_sub_ps(`0`, a, b);
45735	assert_eq_m512(r, _mm512_setzero_ps());
45736	let r = _mm512_maskz_sub_ps(`0b00000000_11111111`, a, b);
45737	#[rustfmt::skip]
45738	let e = _mm512_setr_ps(
45739	`-1.`, `0.`, `-2.`, f32::MAX - `1.`,
45740	f32::MIN, `99.`, `-101.`, `-33.`,
45741	`0.`, `0.`, `0.`, `0.`,
45742	`0.`, `0.`, `0.`, `0.`,
45743	);
45744	assert_eq_m512(r, e);
45745	}
45746
45747	#[simd_test(enable = "avx512f,avx512vl")]
45748	const fn test_mm256_mask_sub_ps() {
45749	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::MIN, `100.`, `-100.`, `-32.`);
45750	let b = _mm256_set1_ps(`1.`);
45751	let r = _mm256_mask_sub_ps(a, `0`, a, b);
45752	assert_eq_m256(r, a);
45753	let r = _mm256_mask_sub_ps(a, `0b11111111`, a, b);
45754	let e = _mm256_set_ps(`-1.`, `0.`, `-2.`, f32::MAX - `1.`, f32::MIN, `99.`, `-101.`, `-33.`);
45755	assert_eq_m256(r, e);
45756	}
45757
45758	#[simd_test(enable = "avx512f,avx512vl")]
45759	const fn test_mm256_maskz_sub_ps() {
45760	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::MIN, `100.`, `-100.`, `-32.`);
45761	let b = _mm256_set1_ps(`1.`);
45762	let r = _mm256_maskz_sub_ps(`0`, a, b);
45763	assert_eq_m256(r, _mm256_setzero_ps());
45764	let r = _mm256_maskz_sub_ps(`0b11111111`, a, b);
45765	let e = _mm256_set_ps(`-1.`, `0.`, `-2.`, f32::MAX - `1.`, f32::MIN, `99.`, `-101.`, `-33.`);
45766	assert_eq_m256(r, e);
45767	}
45768
45769	#[simd_test(enable = "avx512f,avx512vl")]
45770	const fn test_mm_mask_sub_ps() {
45771	let a = _mm_set_ps(`1.`, `-1.`, f32::MAX, f32::MIN);
45772	let b = _mm_set1_ps(`1.`);
45773	let r = _mm_mask_sub_ps(a, `0`, a, b);
45774	assert_eq_m128(r, a);
45775	let r = _mm_mask_sub_ps(a, `0b00001111`, a, b);
45776	let e = _mm_set_ps(`0.`, `-2.`, f32::MAX - `1.`, f32::MIN);
45777	assert_eq_m128(r, e);
45778	}
45779
45780	#[simd_test(enable = "avx512f,avx512vl")]
45781	const fn test_mm_maskz_sub_ps() {
45782	let a = _mm_set_ps(`1.`, `-1.`, f32::MAX, f32::MIN);
45783	let b = _mm_set1_ps(`1.`);
45784	let r = _mm_maskz_sub_ps(`0`, a, b);
45785	assert_eq_m128(r, _mm_setzero_ps());
45786	let r = _mm_maskz_sub_ps(`0b00001111`, a, b);
45787	let e = _mm_set_ps(`0.`, `-2.`, f32::MAX - `1.`, f32::MIN);
45788	assert_eq_m128(r, e);
45789	}
45790
45791	#[simd_test(enable = "avx512f")]
45792	const fn test_mm512_mullo_epi32() {
45793	#[rustfmt::skip]
45794	let a = _mm512_setr_epi32(
45795	`0`, `1`, `-1`, i32::MAX,
45796	i32::MIN, `100`, `-100`, `-32`,
45797	`0`, `1`, `-1`, i32::MAX,
45798	i32::MIN, `100`, `-100`, `-32`,
45799	);
45800	let b = _mm512_set1_epi32(`2`);
45801	let r = _mm512_mullo_epi32(a, b);
45802	let e = _mm512_setr_epi32(
45803	`0`, `2`, `-2`, `-2`, `0`, `200`, `-200`, `-64`, `0`, `2`, `-2`, `-2`, `0`, `200`, `-200`, `-64`,
45804	);
45805	assert_eq_m512i(r, e);
45806	}
45807
45808	#[simd_test(enable = "avx512f")]
45809	const fn test_mm512_mask_mullo_epi32() {
45810	#[rustfmt::skip]
45811	let a = _mm512_setr_epi32(
45812	`0`, `1`, `-1`, i32::MAX,
45813	i32::MIN, `100`, `-100`, `-32`,
45814	`0`, `1`, `-1`, i32::MAX,
45815	i32::MIN, `100`, `-100`, `-32`,
45816	);
45817	let b = _mm512_set1_epi32(`2`);
45818	let r = _mm512_mask_mullo_epi32(a, `0`, a, b);
45819	assert_eq_m512i(r, a);
45820	let r = _mm512_mask_mullo_epi32(a, `0b00000000_11111111`, a, b);
45821	#[rustfmt::skip]
45822	let e = _mm512_setr_epi32(
45823	`0`, `2`, `-2`, `-2`,
45824	`0`, `200`, `-200`, `-64`,
45825	`0`, `1`, `-1`, i32::MAX,
45826	i32::MIN, `100`, `-100`, `-32`,
45827	);
45828	assert_eq_m512i(r, e);
45829	}
45830
45831	#[simd_test(enable = "avx512f")]
45832	const fn test_mm512_maskz_mullo_epi32() {
45833	#[rustfmt::skip]
45834	let a = _mm512_setr_epi32(
45835	`0`, `1`, `-1`, i32::MAX,
45836	i32::MIN, `100`, `-100`, `-32`,
45837	`0`, `1`, `-1`, i32::MAX,
45838	i32::MIN, `100`, `-100`, `-32`,
45839	);
45840	let b = _mm512_set1_epi32(`2`);
45841	let r = _mm512_maskz_mullo_epi32(`0`, a, b);
45842	assert_eq_m512i(r, _mm512_setzero_si512());
45843	let r = _mm512_maskz_mullo_epi32(`0b00000000_11111111`, a, b);
45844	let e = _mm512_setr_epi32(`0`, `2`, `-2`, `-2`, `0`, `200`, `-200`, `-64`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
45845	assert_eq_m512i(r, e);
45846	}
45847
45848	#[simd_test(enable = "avx512f,avx512vl")]
45849	const fn test_mm256_mask_mullo_epi32() {
45850	let a = _mm256_set_epi32(`0`, `1`, `-1`, i32::MAX, i32::MIN, `100`, `-100`, `-32`);
45851	let b = _mm256_set1_epi32(`2`);
45852	let r = _mm256_mask_mullo_epi32(a, `0`, a, b);
45853	assert_eq_m256i(r, a);
45854	let r = _mm256_mask_mullo_epi32(a, `0b11111111`, a, b);
45855	let e = _mm256_set_epi32(`0`, `2`, `-2`, `-2`, `0`, `200`, `-200`, `-64`);
45856	assert_eq_m256i(r, e);
45857	}
45858
45859	#[simd_test(enable = "avx512f,avx512vl")]
45860	const fn test_mm256_maskz_mullo_epi32() {
45861	let a = _mm256_set_epi32(`0`, `1`, `-1`, i32::MAX, i32::MIN, `100`, `-100`, `-32`);
45862	let b = _mm256_set1_epi32(`2`);
45863	let r = _mm256_maskz_mullo_epi32(`0`, a, b);
45864	assert_eq_m256i(r, _mm256_setzero_si256());
45865	let r = _mm256_maskz_mullo_epi32(`0b11111111`, a, b);
45866	let e = _mm256_set_epi32(`0`, `2`, `-2`, `-2`, `0`, `200`, `-200`, `-64`);
45867	assert_eq_m256i(r, e);
45868	}
45869
45870	#[simd_test(enable = "avx512f,avx512vl")]
45871	const fn test_mm_mask_mullo_epi32() {
45872	let a = _mm_set_epi32(`1`, `-1`, i32::MAX, i32::MIN);
45873	let b = _mm_set1_epi32(`2`);
45874	let r = _mm_mask_mullo_epi32(a, `0`, a, b);
45875	assert_eq_m128i(r, a);
45876	let r = _mm_mask_mullo_epi32(a, `0b00001111`, a, b);
45877	let e = _mm_set_epi32(`2`, `-2`, `-2`, `0`);
45878	assert_eq_m128i(r, e);
45879	}
45880
45881	#[simd_test(enable = "avx512f,avx512vl")]
45882	const fn test_mm_maskz_mullo_epi32() {
45883	let a = _mm_set_epi32(`1`, `-1`, i32::MAX, i32::MIN);
45884	let b = _mm_set1_epi32(`2`);
45885	let r = _mm_maskz_mullo_epi32(`0`, a, b);
45886	assert_eq_m128i(r, _mm_setzero_si128());
45887	let r = _mm_maskz_mullo_epi32(`0b00001111`, a, b);
45888	let e = _mm_set_epi32(`2`, `-2`, `-2`, `0`);
45889	assert_eq_m128i(r, e);
45890	}
45891
45892	#[simd_test(enable = "avx512f")]
45893	const fn test_mm512_mul_ps() {
45894	#[rustfmt::skip]
45895	let a = _mm512_setr_ps(
45896	`0.`, `1.`, `-1.`, f32::MAX,
45897	f32::MIN, `100.`, `-100.`, `-32.`,
45898	`0.`, `1.`, `-1.`, f32::MAX,
45899	f32::MIN, `100.`, `-100.`, `-32.`,
45900	);
45901	let b = _mm512_set1_ps(`2.`);
45902	let r = _mm512_mul_ps(a, b);
45903	#[rustfmt::skip]
45904	let e = _mm512_setr_ps(
45905	`0.`, `2.`, `-2.`, f32::INFINITY,
45906	f32::NEG_INFINITY, `200.`, `-200.`, `-64.`,
45907	`0.`, `2.`, `-2.`, f32::INFINITY,
45908	f32::NEG_INFINITY, `200.`, `-200.`,
45909	`-64.`,
45910	);
45911	assert_eq_m512(r, e);
45912	}
45913
45914	#[simd_test(enable = "avx512f")]
45915	const fn test_mm512_mask_mul_ps() {
45916	#[rustfmt::skip]
45917	let a = _mm512_setr_ps(
45918	`0.`, `1.`, `-1.`, f32::MAX,
45919	f32::MIN, `100.`, `-100.`, `-32.`,
45920	`0.`, `1.`, `-1.`, f32::MAX,
45921	f32::MIN, `100.`, `-100.`, `-32.`,
45922	);
45923	let b = _mm512_set1_ps(`2.`);
45924	let r = _mm512_mask_mul_ps(a, `0`, a, b);
45925	assert_eq_m512(r, a);
45926	let r = _mm512_mask_mul_ps(a, `0b00000000_11111111`, a, b);
45927	#[rustfmt::skip]
45928	let e = _mm512_setr_ps(
45929	`0.`, `2.`, `-2.`, f32::INFINITY,
45930	f32::NEG_INFINITY, `200.`, `-200.`, `-64.`,
45931	`0.`, `1.`, `-1.`, f32::MAX,
45932	f32::MIN, `100.`, `-100.`, `-32.`,
45933	);
45934	assert_eq_m512(r, e);
45935	}
45936
45937	#[simd_test(enable = "avx512f")]
45938	const fn test_mm512_maskz_mul_ps() {
45939	#[rustfmt::skip]
45940	let a = _mm512_setr_ps(
45941	`0.`, `1.`, `-1.`, f32::MAX,
45942	f32::MIN, `100.`, `-100.`, `-32.`,
45943	`0.`, `1.`, `-1.`, f32::MAX,
45944	f32::MIN, `100.`, `-100.`, `-32.`,
45945	);
45946	let b = _mm512_set1_ps(`2.`);
45947	let r = _mm512_maskz_mul_ps(`0`, a, b);
45948	assert_eq_m512(r, _mm512_setzero_ps());
45949	let r = _mm512_maskz_mul_ps(`0b00000000_11111111`, a, b);
45950	#[rustfmt::skip]
45951	let e = _mm512_setr_ps(
45952	`0.`, `2.`, `-2.`, f32::INFINITY,
45953	f32::NEG_INFINITY, `200.`, `-200.`, `-64.`,
45954	`0.`, `0.`, `0.`, `0.`,
45955	`0.`, `0.`, `0.`, `0.`,
45956	);
45957	assert_eq_m512(r, e);
45958	}
45959
45960	#[simd_test(enable = "avx512f,avx512vl")]
45961	const fn test_mm256_mask_mul_ps() {
45962	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::MIN, `100.`, `-100.`, `-32.`);
45963	let b = _mm256_set1_ps(`2.`);
45964	let r = _mm256_mask_mul_ps(a, `0`, a, b);
45965	assert_eq_m256(r, a);
45966	let r = _mm256_mask_mul_ps(a, `0b11111111`, a, b);
45967	#[rustfmt::skip]
45968	let e = _mm256_set_ps(
45969	`0.`, `2.`, `-2.`, f32::INFINITY,
45970	f32::NEG_INFINITY, `200.`, `-200.`, `-64.`,
45971	);
45972	assert_eq_m256(r, e);
45973	}
45974
45975	#[simd_test(enable = "avx512f,avx512vl")]
45976	const fn test_mm256_maskz_mul_ps() {
45977	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::MIN, `100.`, `-100.`, `-32.`);
45978	let b = _mm256_set1_ps(`2.`);
45979	let r = _mm256_maskz_mul_ps(`0`, a, b);
45980	assert_eq_m256(r, _mm256_setzero_ps());
45981	let r = _mm256_maskz_mul_ps(`0b11111111`, a, b);
45982	#[rustfmt::skip]
45983	let e = _mm256_set_ps(
45984	`0.`, `2.`, `-2.`, f32::INFINITY,
45985	f32::NEG_INFINITY, `200.`, `-200.`, `-64.`,
45986	);
45987	assert_eq_m256(r, e);
45988	}
45989
45990	#[simd_test(enable = "avx512f,avx512vl")]
45991	const fn test_mm_mask_mul_ps() {
45992	let a = _mm_set_ps(`1.`, `-1.`, f32::MAX, f32::MIN);
45993	let b = _mm_set1_ps(`2.`);
45994	let r = _mm_mask_mul_ps(a, `0`, a, b);
45995	assert_eq_m128(r, a);
45996	let r = _mm_mask_mul_ps(a, `0b00001111`, a, b);
45997	let e = _mm_set_ps(`2.`, `-2.`, f32::INFINITY, f32::NEG_INFINITY);
45998	assert_eq_m128(r, e);
45999	}
46000
46001	#[simd_test(enable = "avx512f,avx512vl")]
46002	const fn test_mm_maskz_mul_ps() {
46003	let a = _mm_set_ps(`1.`, `-1.`, f32::MAX, f32::MIN);
46004	let b = _mm_set1_ps(`2.`);
46005	let r = _mm_maskz_mul_ps(`0`, a, b);
46006	assert_eq_m128(r, _mm_setzero_ps());
46007	let r = _mm_maskz_mul_ps(`0b00001111`, a, b);
46008	let e = _mm_set_ps(`2.`, `-2.`, f32::INFINITY, f32::NEG_INFINITY);
46009	assert_eq_m128(r, e);
46010	}
46011
46012	#[simd_test(enable = "avx512f")]
46013	const fn test_mm512_div_ps() {
46014	let a = _mm512_setr_ps(
46015	`0.`, `1.`, `-1.`, `-2.`, `100.`, `100.`, `-100.`, `-32.`, `0.`, `1.`, `-1.`, `1000.`, `-131.`, `100.`, `-100.`, `-32.`,
46016	);
46017	let b = _mm512_setr_ps(
46018	`2.`, `2.`, `2.`, `2.`, `2.`, `0.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `0.`, `2.`, `2.`, `2.`,
46019	);
46020	let r = _mm512_div_ps(a, b);
46021	#[rustfmt::skip]
46022	let e = _mm512_setr_ps(
46023	`0.`, `0.5`, `-0.5`, `-1.`,
46024	`50.`, f32::INFINITY, `-50.`, `-16.`,
46025	`0.`, `0.5`, `-0.5`, `500.`,
46026	f32::NEG_INFINITY, `50.`, `-50.`, `-16.`,
46027	);
46028	assert_eq_m512(r, e); // 0/0 = NAN
46029	}
46030
46031	#[simd_test(enable = "avx512f")]
46032	const fn test_mm512_mask_div_ps() {
46033	let a = _mm512_setr_ps(
46034	`0.`, `1.`, `-1.`, `-2.`, `100.`, `100.`, `-100.`, `-32.`, `0.`, `1.`, `-1.`, `1000.`, `-131.`, `100.`, `-100.`, `-32.`,
46035	);
46036	let b = _mm512_setr_ps(
46037	`2.`, `2.`, `2.`, `2.`, `2.`, `0.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `0.`, `2.`, `2.`, `2.`,
46038	);
46039	let r = _mm512_mask_div_ps(a, `0`, a, b);
46040	assert_eq_m512(r, a);
46041	let r = _mm512_mask_div_ps(a, `0b00000000_11111111`, a, b);
46042	#[rustfmt::skip]
46043	let e = _mm512_setr_ps(
46044	`0.`, `0.5`, `-0.5`, `-1.`,
46045	`50.`, f32::INFINITY, `-50.`, `-16.`,
46046	`0.`, `1.`, `-1.`, `1000.`,
46047	`-131.`, `100.`, `-100.`, `-32.`,
46048	);
46049	assert_eq_m512(r, e);
46050	}
46051
46052	#[simd_test(enable = "avx512f")]
46053	const fn test_mm512_maskz_div_ps() {
46054	let a = _mm512_setr_ps(
46055	`0.`, `1.`, `-1.`, `-2.`, `100.`, `100.`, `-100.`, `-32.`, `0.`, `1.`, `-1.`, `1000.`, `-131.`, `100.`, `-100.`, `-32.`,
46056	);
46057	let b = _mm512_setr_ps(
46058	`2.`, `2.`, `2.`, `2.`, `2.`, `0.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `0.`, `2.`, `2.`, `2.`,
46059	);
46060	let r = _mm512_maskz_div_ps(`0`, a, b);
46061	assert_eq_m512(r, _mm512_setzero_ps());
46062	let r = _mm512_maskz_div_ps(`0b00000000_11111111`, a, b);
46063	#[rustfmt::skip]
46064	let e = _mm512_setr_ps(
46065	`0.`, `0.5`, `-0.5`, `-1.`,
46066	`50.`, f32::INFINITY, `-50.`, `-16.`,
46067	`0.`, `0.`, `0.`, `0.`,
46068	`0.`, `0.`, `0.`, `0.`,
46069	);
46070	assert_eq_m512(r, e);
46071	}
46072
46073	#[simd_test(enable = "avx512f,avx512vl")]
46074	const fn test_mm256_mask_div_ps() {
46075	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, `-2.`, `100.`, `100.`, `-100.`, `-32.`);
46076	let b = _mm256_set_ps(`2.`, `2.`, `2.`, `2.`, `2.`, `0.`, `2.`, `2.`);
46077	let r = _mm256_mask_div_ps(a, `0`, a, b);
46078	assert_eq_m256(r, a);
46079	let r = _mm256_mask_div_ps(a, `0b11111111`, a, b);
46080	let e = _mm256_set_ps(`0.`, `0.5`, `-0.5`, `-1.`, `50.`, f32::INFINITY, `-50.`, `-16.`);
46081	assert_eq_m256(r, e);
46082	}
46083
46084	#[simd_test(enable = "avx512f,avx512vl")]
46085	const fn test_mm256_maskz_div_ps() {
46086	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, `-2.`, `100.`, `100.`, `-100.`, `-32.`);
46087	let b = _mm256_set_ps(`2.`, `2.`, `2.`, `2.`, `2.`, `0.`, `2.`, `2.`);
46088	let r = _mm256_maskz_div_ps(`0`, a, b);
46089	assert_eq_m256(r, _mm256_setzero_ps());
46090	let r = _mm256_maskz_div_ps(`0b11111111`, a, b);
46091	let e = _mm256_set_ps(`0.`, `0.5`, `-0.5`, `-1.`, `50.`, f32::INFINITY, `-50.`, `-16.`);
46092	assert_eq_m256(r, e);
46093	}
46094
46095	#[simd_test(enable = "avx512f,avx512vl")]
46096	const fn test_mm_mask_div_ps() {
46097	let a = _mm_set_ps(`100.`, `100.`, `-100.`, `-32.`);
46098	let b = _mm_set_ps(`2.`, `0.`, `2.`, `2.`);
46099	let r = _mm_mask_div_ps(a, `0`, a, b);
46100	assert_eq_m128(r, a);
46101	let r = _mm_mask_div_ps(a, `0b00001111`, a, b);
46102	let e = _mm_set_ps(`50.`, f32::INFINITY, `-50.`, `-16.`);
46103	assert_eq_m128(r, e);
46104	}
46105
46106	#[simd_test(enable = "avx512f,avx512vl")]
46107	const fn test_mm_maskz_div_ps() {
46108	let a = _mm_set_ps(`100.`, `100.`, `-100.`, `-32.`);
46109	let b = _mm_set_ps(`2.`, `0.`, `2.`, `2.`);
46110	let r = _mm_maskz_div_ps(`0`, a, b);
46111	assert_eq_m128(r, _mm_setzero_ps());
46112	let r = _mm_maskz_div_ps(`0b00001111`, a, b);
46113	let e = _mm_set_ps(`50.`, f32::INFINITY, `-50.`, `-16.`);
46114	assert_eq_m128(r, e);
46115	}
46116
46117	#[simd_test(enable = "avx512f")]
46118	const fn test_mm512_max_epi32() {
46119	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46120	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
46121	let r = _mm512_max_epi32(a, b);
46122	let e = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46123	assert_eq_m512i(r, e);
46124	}
46125
46126	#[simd_test(enable = "avx512f")]
46127	const fn test_mm512_mask_max_epi32() {
46128	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46129	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
46130	let r = _mm512_mask_max_epi32(a, `0`, a, b);
46131	assert_eq_m512i(r, a);
46132	let r = _mm512_mask_max_epi32(a, `0b00000000_11111111`, a, b);
46133	let e = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46134	assert_eq_m512i(r, e);
46135	}
46136
46137	#[simd_test(enable = "avx512f")]
46138	const fn test_mm512_maskz_max_epi32() {
46139	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46140	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
46141	let r = _mm512_maskz_max_epi32(`0`, a, b);
46142	assert_eq_m512i(r, _mm512_setzero_si512());
46143	let r = _mm512_maskz_max_epi32(`0b00000000_11111111`, a, b);
46144	let e = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
46145	assert_eq_m512i(r, e);
46146	}
46147
46148	#[simd_test(enable = "avx512f,avx512vl")]
46149	const fn test_mm256_mask_max_epi32() {
46150	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46151	let b = _mm256_set_epi32(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
46152	let r = _mm256_mask_max_epi32(a, `0`, a, b);
46153	assert_eq_m256i(r, a);
46154	let r = _mm256_mask_max_epi32(a, `0b11111111`, a, b);
46155	let e = _mm256_set_epi32(`7`, `6`, `5`, `4`, `4`, `5`, `6`, `7`);
46156	assert_eq_m256i(r, e);
46157	}
46158
46159	#[simd_test(enable = "avx512f,avx512vl")]
46160	const fn test_mm256_maskz_max_epi32() {
46161	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46162	let b = _mm256_set_epi32(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
46163	let r = _mm256_maskz_max_epi32(`0`, a, b);
46164	assert_eq_m256i(r, _mm256_setzero_si256());
46165	let r = _mm256_maskz_max_epi32(`0b11111111`, a, b);
46166	let e = _mm256_set_epi32(`7`, `6`, `5`, `4`, `4`, `5`, `6`, `7`);
46167	assert_eq_m256i(r, e);
46168	}
46169
46170	#[simd_test(enable = "avx512f,avx512vl")]
46171	const fn test_mm_mask_max_epi32() {
46172	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
46173	let b = _mm_set_epi32(`3`, `2`, `1`, `0`);
46174	let r = _mm_mask_max_epi32(a, `0`, a, b);
46175	assert_eq_m128i(r, a);
46176	let r = _mm_mask_max_epi32(a, `0b00001111`, a, b);
46177	let e = _mm_set_epi32(`3`, `2`, `2`, `3`);
46178	assert_eq_m128i(r, e);
46179	}
46180
46181	#[simd_test(enable = "avx512f,avx512vl")]
46182	const fn test_mm_maskz_max_epi32() {
46183	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
46184	let b = _mm_set_epi32(`3`, `2`, `1`, `0`);
46185	let r = _mm_maskz_max_epi32(`0`, a, b);
46186	assert_eq_m128i(r, _mm_setzero_si128());
46187	let r = _mm_maskz_max_epi32(`0b00001111`, a, b);
46188	let e = _mm_set_epi32(`3`, `2`, `2`, `3`);
46189	assert_eq_m128i(r, e);
46190	}
46191
46192	#[simd_test(enable = "avx512f")]
46193	fn test_mm512_max_ps() {
46194	let a = _mm512_setr_ps(
46195	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46196	);
46197	let b = _mm512_setr_ps(
46198	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
46199	);
46200	let r = _mm512_max_ps(a, b);
46201	let e = _mm512_setr_ps(
46202	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46203	);
46204	assert_eq_m512(r, e);
46205	}
46206
46207	#[simd_test(enable = "avx512f")]
46208	fn test_mm512_mask_max_ps() {
46209	let a = _mm512_setr_ps(
46210	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46211	);
46212	let b = _mm512_setr_ps(
46213	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
46214	);
46215	let r = _mm512_mask_max_ps(a, `0`, a, b);
46216	assert_eq_m512(r, a);
46217	let r = _mm512_mask_max_ps(a, `0b00000000_11111111`, a, b);
46218	let e = _mm512_setr_ps(
46219	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46220	);
46221	assert_eq_m512(r, e);
46222	}
46223
46224	#[simd_test(enable = "avx512f")]
46225	fn test_mm512_maskz_max_ps() {
46226	let a = _mm512_setr_ps(
46227	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46228	);
46229	let b = _mm512_setr_ps(
46230	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
46231	);
46232	let r = _mm512_maskz_max_ps(`0`, a, b);
46233	assert_eq_m512(r, _mm512_setzero_ps());
46234	let r = _mm512_maskz_max_ps(`0b00000000_11111111`, a, b);
46235	let e = _mm512_setr_ps(
46236	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
46237	);
46238	assert_eq_m512(r, e);
46239	}
46240
46241	#[simd_test(enable = "avx512f,avx512vl")]
46242	fn test_mm256_mask_max_ps() {
46243	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
46244	let b = _mm256_set_ps(`7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`);
46245	let r = _mm256_mask_max_ps(a, `0`, a, b);
46246	assert_eq_m256(r, a);
46247	let r = _mm256_mask_max_ps(a, `0b11111111`, a, b);
46248	let e = _mm256_set_ps(`7.`, `6.`, `5.`, `4.`, `4.`, `5.`, `6.`, `7.`);
46249	assert_eq_m256(r, e);
46250	}
46251
46252	#[simd_test(enable = "avx512f,avx512vl")]
46253	fn test_mm256_maskz_max_ps() {
46254	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
46255	let b = _mm256_set_ps(`7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`);
46256	let r = _mm256_maskz_max_ps(`0`, a, b);
46257	assert_eq_m256(r, _mm256_setzero_ps());
46258	let r = _mm256_maskz_max_ps(`0b11111111`, a, b);
46259	let e = _mm256_set_ps(`7.`, `6.`, `5.`, `4.`, `4.`, `5.`, `6.`, `7.`);
46260	assert_eq_m256(r, e);
46261	}
46262
46263	#[simd_test(enable = "avx512f,avx512vl")]
46264	fn test_mm_mask_max_ps() {
46265	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
46266	let b = _mm_set_ps(`3.`, `2.`, `1.`, `0.`);
46267	let r = _mm_mask_max_ps(a, `0`, a, b);
46268	assert_eq_m128(r, a);
46269	let r = _mm_mask_max_ps(a, `0b00001111`, a, b);
46270	let e = _mm_set_ps(`3.`, `2.`, `2.`, `3.`);
46271	assert_eq_m128(r, e);
46272	}
46273
46274	#[simd_test(enable = "avx512f,avx512vl")]
46275	fn test_mm_maskz_max_ps() {
46276	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
46277	let b = _mm_set_ps(`3.`, `2.`, `1.`, `0.`);
46278	let r = _mm_maskz_max_ps(`0`, a, b);
46279	assert_eq_m128(r, _mm_setzero_ps());
46280	let r = _mm_mask_max_ps(a, `0b00001111`, a, b);
46281	let e = _mm_set_ps(`3.`, `2.`, `2.`, `3.`);
46282	assert_eq_m128(r, e);
46283	}
46284
46285	#[simd_test(enable = "avx512f")]
46286	const fn test_mm512_max_epu32() {
46287	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46288	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
46289	let r = _mm512_max_epu32(a, b);
46290	let e = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46291	assert_eq_m512i(r, e);
46292	}
46293
46294	#[simd_test(enable = "avx512f")]
46295	const fn test_mm512_mask_max_epu32() {
46296	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46297	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
46298	let r = _mm512_mask_max_epu32(a, `0`, a, b);
46299	assert_eq_m512i(r, a);
46300	let r = _mm512_mask_max_epu32(a, `0b00000000_11111111`, a, b);
46301	let e = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46302	assert_eq_m512i(r, e);
46303	}
46304
46305	#[simd_test(enable = "avx512f")]
46306	const fn test_mm512_maskz_max_epu32() {
46307	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46308	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
46309	let r = _mm512_maskz_max_epu32(`0`, a, b);
46310	assert_eq_m512i(r, _mm512_setzero_si512());
46311	let r = _mm512_maskz_max_epu32(`0b00000000_11111111`, a, b);
46312	let e = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
46313	assert_eq_m512i(r, e);
46314	}
46315
46316	#[simd_test(enable = "avx512f,avx512vl")]
46317	const fn test_mm256_mask_max_epu32() {
46318	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46319	let b = _mm256_set_epi32(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
46320	let r = _mm256_mask_max_epu32(a, `0`, a, b);
46321	assert_eq_m256i(r, a);
46322	let r = _mm256_mask_max_epu32(a, `0b11111111`, a, b);
46323	let e = _mm256_set_epi32(`7`, `6`, `5`, `4`, `4`, `5`, `6`, `7`);
46324	assert_eq_m256i(r, e);
46325	}
46326
46327	#[simd_test(enable = "avx512f,avx512vl")]
46328	const fn test_mm256_maskz_max_epu32() {
46329	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46330	let b = _mm256_set_epi32(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
46331	let r = _mm256_maskz_max_epu32(`0`, a, b);
46332	assert_eq_m256i(r, _mm256_setzero_si256());
46333	let r = _mm256_maskz_max_epu32(`0b11111111`, a, b);
46334	let e = _mm256_set_epi32(`7`, `6`, `5`, `4`, `4`, `5`, `6`, `7`);
46335	assert_eq_m256i(r, e);
46336	}
46337
46338	#[simd_test(enable = "avx512f,avx512vl")]
46339	const fn test_mm_mask_max_epu32() {
46340	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
46341	let b = _mm_set_epi32(`3`, `2`, `1`, `0`);
46342	let r = _mm_mask_max_epu32(a, `0`, a, b);
46343	assert_eq_m128i(r, a);
46344	let r = _mm_mask_max_epu32(a, `0b00001111`, a, b);
46345	let e = _mm_set_epi32(`3`, `2`, `2`, `3`);
46346	assert_eq_m128i(r, e);
46347	}
46348
46349	#[simd_test(enable = "avx512f,avx512vl")]
46350	const fn test_mm_maskz_max_epu32() {
46351	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
46352	let b = _mm_set_epi32(`3`, `2`, `1`, `0`);
46353	let r = _mm_maskz_max_epu32(`0`, a, b);
46354	assert_eq_m128i(r, _mm_setzero_si128());
46355	let r = _mm_maskz_max_epu32(`0b00001111`, a, b);
46356	let e = _mm_set_epi32(`3`, `2`, `2`, `3`);
46357	assert_eq_m128i(r, e);
46358	}
46359
46360	#[simd_test(enable = "avx512f")]
46361	const fn test_mm512_min_epi32() {
46362	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46363	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
46364	let r = _mm512_min_epi32(a, b);
46365	let e = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
46366	assert_eq_m512i(r, e);
46367	}
46368
46369	#[simd_test(enable = "avx512f")]
46370	const fn test_mm512_mask_min_epi32() {
46371	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46372	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
46373	let r = _mm512_mask_min_epi32(a, `0`, a, b);
46374	assert_eq_m512i(r, a);
46375	let r = _mm512_mask_min_epi32(a, `0b00000000_11111111`, a, b);
46376	let e = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46377	assert_eq_m512i(r, e);
46378	}
46379
46380	#[simd_test(enable = "avx512f")]
46381	const fn test_mm512_maskz_min_epi32() {
46382	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46383	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
46384	let r = _mm512_maskz_min_epi32(`0`, a, b);
46385	assert_eq_m512i(r, _mm512_setzero_si512());
46386	let r = _mm512_maskz_min_epi32(`0b00000000_11111111`, a, b);
46387	let e = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
46388	assert_eq_m512i(r, e);
46389	}
46390
46391	#[simd_test(enable = "avx512f,avx512vl")]
46392	const fn test_mm256_mask_min_epi32() {
46393	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46394	let b = _mm256_set_epi32(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
46395	let r = _mm256_mask_min_epi32(a, `0`, a, b);
46396	assert_eq_m256i(r, a);
46397	let r = _mm256_mask_min_epi32(a, `0b11111111`, a, b);
46398	let e = _mm256_set_epi32(`0`, `1`, `2`, `3`, `3`, `2`, `1`, `0`);
46399	assert_eq_m256i(r, e);
46400	}
46401
46402	#[simd_test(enable = "avx512f,avx512vl")]
46403	const fn test_mm256_maskz_min_epi32() {
46404	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46405	let b = _mm256_set_epi32(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
46406	let r = _mm256_maskz_min_epi32(`0`, a, b);
46407	assert_eq_m256i(r, _mm256_setzero_si256());
46408	let r = _mm256_maskz_min_epi32(`0b11111111`, a, b);
46409	let e = _mm256_set_epi32(`0`, `1`, `2`, `3`, `3`, `2`, `1`, `0`);
46410	assert_eq_m256i(r, e);
46411	}
46412
46413	#[simd_test(enable = "avx512f,avx512vl")]
46414	const fn test_mm_mask_min_epi32() {
46415	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
46416	let b = _mm_set_epi32(`3`, `2`, `1`, `0`);
46417	let r = _mm_mask_min_epi32(a, `0`, a, b);
46418	assert_eq_m128i(r, a);
46419	let r = _mm_mask_min_epi32(a, `0b00001111`, a, b);
46420	let e = _mm_set_epi32(`0`, `1`, `1`, `0`);
46421	assert_eq_m128i(r, e);
46422	}
46423
46424	#[simd_test(enable = "avx512f,avx512vl")]
46425	const fn test_mm_maskz_min_epi32() {
46426	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
46427	let b = _mm_set_epi32(`3`, `2`, `1`, `0`);
46428	let r = _mm_maskz_min_epi32(`0`, a, b);
46429	assert_eq_m128i(r, _mm_setzero_si128());
46430	let r = _mm_maskz_min_epi32(`0b00001111`, a, b);
46431	let e = _mm_set_epi32(`0`, `1`, `1`, `0`);
46432	assert_eq_m128i(r, e);
46433	}
46434
46435	#[simd_test(enable = "avx512f")]
46436	fn test_mm512_min_ps() {
46437	let a = _mm512_setr_ps(
46438	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46439	);
46440	let b = _mm512_setr_ps(
46441	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
46442	);
46443	let r = _mm512_min_ps(a, b);
46444	let e = _mm512_setr_ps(
46445	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
46446	);
46447	assert_eq_m512(r, e);
46448	}
46449
46450	#[simd_test(enable = "avx512f")]
46451	fn test_mm512_mask_min_ps() {
46452	let a = _mm512_setr_ps(
46453	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46454	);
46455	let b = _mm512_setr_ps(
46456	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
46457	);
46458	let r = _mm512_mask_min_ps(a, `0`, a, b);
46459	assert_eq_m512(r, a);
46460	let r = _mm512_mask_min_ps(a, `0b00000000_11111111`, a, b);
46461	let e = _mm512_setr_ps(
46462	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46463	);
46464	assert_eq_m512(r, e);
46465	}
46466
46467	#[simd_test(enable = "avx512f")]
46468	fn test_mm512_maskz_min_ps() {
46469	let a = _mm512_setr_ps(
46470	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46471	);
46472	let b = _mm512_setr_ps(
46473	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
46474	);
46475	let r = _mm512_maskz_min_ps(`0`, a, b);
46476	assert_eq_m512(r, _mm512_setzero_ps());
46477	let r = _mm512_maskz_min_ps(`0b00000000_11111111`, a, b);
46478	let e = _mm512_setr_ps(
46479	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
46480	);
46481	assert_eq_m512(r, e);
46482	}
46483
46484	#[simd_test(enable = "avx512f,avx512vl")]
46485	fn test_mm256_mask_min_ps() {
46486	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
46487	let b = _mm256_set_ps(`7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`);
46488	let r = _mm256_mask_min_ps(a, `0`, a, b);
46489	assert_eq_m256(r, a);
46490	let r = _mm256_mask_min_ps(a, `0b11111111`, a, b);
46491	let e = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `3.`, `2.`, `1.`, `0.`);
46492	assert_eq_m256(r, e);
46493	}
46494
46495	#[simd_test(enable = "avx512f,avx512vl")]
46496	fn test_mm256_maskz_min_ps() {
46497	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
46498	let b = _mm256_set_ps(`7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`);
46499	let r = _mm256_maskz_min_ps(`0`, a, b);
46500	assert_eq_m256(r, _mm256_setzero_ps());
46501	let r = _mm256_maskz_min_ps(`0b11111111`, a, b);
46502	let e = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `3.`, `2.`, `1.`, `0.`);
46503	assert_eq_m256(r, e);
46504	}
46505
46506	#[simd_test(enable = "avx512f,avx512vl")]
46507	fn test_mm_mask_min_ps() {
46508	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
46509	let b = _mm_set_ps(`3.`, `2.`, `1.`, `0.`);
46510	let r = _mm_mask_min_ps(a, `0`, a, b);
46511	assert_eq_m128(r, a);
46512	let r = _mm_mask_min_ps(a, `0b00001111`, a, b);
46513	let e = _mm_set_ps(`0.`, `1.`, `1.`, `0.`);
46514	assert_eq_m128(r, e);
46515	}
46516
46517	#[simd_test(enable = "avx512f,avx512vl")]
46518	fn test_mm_maskz_min_ps() {
46519	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
46520	let b = _mm_set_ps(`3.`, `2.`, `1.`, `0.`);
46521	let r = _mm_maskz_min_ps(`0`, a, b);
46522	assert_eq_m128(r, _mm_setzero_ps());
46523	let r = _mm_maskz_min_ps(`0b00001111`, a, b);
46524	let e = _mm_set_ps(`0.`, `1.`, `1.`, `0.`);
46525	assert_eq_m128(r, e);
46526	}
46527
46528	#[simd_test(enable = "avx512f")]
46529	const fn test_mm512_min_epu32() {
46530	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46531	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
46532	let r = _mm512_min_epu32(a, b);
46533	let e = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
46534	assert_eq_m512i(r, e);
46535	}
46536
46537	#[simd_test(enable = "avx512f")]
46538	const fn test_mm512_mask_min_epu32() {
46539	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46540	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
46541	let r = _mm512_mask_min_epu32(a, `0`, a, b);
46542	assert_eq_m512i(r, a);
46543	let r = _mm512_mask_min_epu32(a, `0b00000000_11111111`, a, b);
46544	let e = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46545	assert_eq_m512i(r, e);
46546	}
46547
46548	#[simd_test(enable = "avx512f")]
46549	const fn test_mm512_maskz_min_epu32() {
46550	let a = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
46551	let b = _mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
46552	let r = _mm512_maskz_min_epu32(`0`, a, b);
46553	assert_eq_m512i(r, _mm512_setzero_si512());
46554	let r = _mm512_maskz_min_epu32(`0b00000000_11111111`, a, b);
46555	let e = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
46556	assert_eq_m512i(r, e);
46557	}
46558
46559	#[simd_test(enable = "avx512f,avx512vl")]
46560	const fn test_mm256_mask_min_epu32() {
46561	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46562	let b = _mm256_set_epi32(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
46563	let r = _mm256_mask_min_epu32(a, `0`, a, b);
46564	assert_eq_m256i(r, a);
46565	let r = _mm256_mask_min_epu32(a, `0b11111111`, a, b);
46566	let e = _mm256_set_epi32(`0`, `1`, `2`, `3`, `3`, `2`, `1`, `0`);
46567	assert_eq_m256i(r, e);
46568	}
46569
46570	#[simd_test(enable = "avx512f,avx512vl")]
46571	const fn test_mm256_maskz_min_epu32() {
46572	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
46573	let b = _mm256_set_epi32(`7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`);
46574	let r = _mm256_maskz_min_epu32(`0`, a, b);
46575	assert_eq_m256i(r, _mm256_setzero_si256());
46576	let r = _mm256_maskz_min_epu32(`0b11111111`, a, b);
46577	let e = _mm256_set_epi32(`0`, `1`, `2`, `3`, `3`, `2`, `1`, `0`);
46578	assert_eq_m256i(r, e);
46579	}
46580
46581	#[simd_test(enable = "avx512f,avx512vl")]
46582	const fn test_mm_mask_min_epu32() {
46583	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
46584	let b = _mm_set_epi32(`3`, `2`, `1`, `0`);
46585	let r = _mm_mask_min_epu32(a, `0`, a, b);
46586	assert_eq_m128i(r, a);
46587	let r = _mm_mask_min_epu32(a, `0b00001111`, a, b);
46588	let e = _mm_set_epi32(`0`, `1`, `1`, `0`);
46589	assert_eq_m128i(r, e);
46590	}
46591
46592	#[simd_test(enable = "avx512f,avx512vl")]
46593	const fn test_mm_maskz_min_epu32() {
46594	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
46595	let b = _mm_set_epi32(`3`, `2`, `1`, `0`);
46596	let r = _mm_maskz_min_epu32(`0`, a, b);
46597	assert_eq_m128i(r, _mm_setzero_si128());
46598	let r = _mm_maskz_min_epu32(`0b00001111`, a, b);
46599	let e = _mm_set_epi32(`0`, `1`, `1`, `0`);
46600	assert_eq_m128i(r, e);
46601	}
46602
46603	#[simd_test(enable = "avx512f")]
46604	fn test_mm512_sqrt_ps() {
46605	let a = _mm512_setr_ps(
46606	`0.`, `1.`, `4.`, `9.`, `16.`, `25.`, `36.`, `49.`, `64.`, `81.`, `100.`, `121.`, `144.`, `169.`, `196.`, `225.`,
46607	);
46608	let r = _mm512_sqrt_ps(a);
46609	let e = _mm512_setr_ps(
46610	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46611	);
46612	assert_eq_m512(r, e);
46613	}
46614
46615	#[simd_test(enable = "avx512f")]
46616	fn test_mm512_mask_sqrt_ps() {
46617	let a = _mm512_setr_ps(
46618	`0.`, `1.`, `4.`, `9.`, `16.`, `25.`, `36.`, `49.`, `64.`, `81.`, `100.`, `121.`, `144.`, `169.`, `196.`, `225.`,
46619	);
46620	let r = _mm512_mask_sqrt_ps(a, `0`, a);
46621	assert_eq_m512(r, a);
46622	let r = _mm512_mask_sqrt_ps(a, `0b00000000_11111111`, a);
46623	let e = _mm512_setr_ps(
46624	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `64.`, `81.`, `100.`, `121.`, `144.`, `169.`, `196.`, `225.`,
46625	);
46626	assert_eq_m512(r, e);
46627	}
46628
46629	#[simd_test(enable = "avx512f")]
46630	fn test_mm512_maskz_sqrt_ps() {
46631	let a = _mm512_setr_ps(
46632	`0.`, `1.`, `4.`, `9.`, `16.`, `25.`, `36.`, `49.`, `64.`, `81.`, `100.`, `121.`, `144.`, `169.`, `196.`, `225.`,
46633	);
46634	let r = _mm512_maskz_sqrt_ps(`0`, a);
46635	assert_eq_m512(r, _mm512_setzero_ps());
46636	let r = _mm512_maskz_sqrt_ps(`0b00000000_11111111`, a);
46637	let e = _mm512_setr_ps(
46638	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
46639	);
46640	assert_eq_m512(r, e);
46641	}
46642
46643	#[simd_test(enable = "avx512f,avx512vl")]
46644	fn test_mm256_mask_sqrt_ps() {
46645	let a = _mm256_set_ps(`0.`, `1.`, `4.`, `9.`, `16.`, `25.`, `36.`, `49.`);
46646	let r = _mm256_mask_sqrt_ps(a, `0`, a);
46647	assert_eq_m256(r, a);
46648	let r = _mm256_mask_sqrt_ps(a, `0b11111111`, a);
46649	let e = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
46650	assert_eq_m256(r, e);
46651	}
46652
46653	#[simd_test(enable = "avx512f,avx512vl")]
46654	fn test_mm256_maskz_sqrt_ps() {
46655	let a = _mm256_set_ps(`0.`, `1.`, `4.`, `9.`, `16.`, `25.`, `36.`, `49.`);
46656	let r = _mm256_maskz_sqrt_ps(`0`, a);
46657	assert_eq_m256(r, _mm256_setzero_ps());
46658	let r = _mm256_maskz_sqrt_ps(`0b11111111`, a);
46659	let e = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
46660	assert_eq_m256(r, e);
46661	}
46662
46663	#[simd_test(enable = "avx512f,avx512vl")]
46664	fn test_mm_mask_sqrt_ps() {
46665	let a = _mm_set_ps(`0.`, `1.`, `4.`, `9.`);
46666	let r = _mm_mask_sqrt_ps(a, `0`, a);
46667	assert_eq_m128(r, a);
46668	let r = _mm_mask_sqrt_ps(a, `0b00001111`, a);
46669	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
46670	assert_eq_m128(r, e);
46671	}
46672
46673	#[simd_test(enable = "avx512f,avx512vl")]
46674	fn test_mm_maskz_sqrt_ps() {
46675	let a = _mm_set_ps(`0.`, `1.`, `4.`, `9.`);
46676	let r = _mm_maskz_sqrt_ps(`0`, a);
46677	assert_eq_m128(r, _mm_setzero_ps());
46678	let r = _mm_maskz_sqrt_ps(`0b00001111`, a);
46679	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
46680	assert_eq_m128(r, e);
46681	}
46682
46683	#[simd_test(enable = "avx512f")]
46684	const fn test_mm512_fmadd_ps() {
46685	let a = _mm512_set1_ps(`1.`);
46686	let b = _mm512_setr_ps(
46687	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46688	);
46689	let c = _mm512_set1_ps(`1.`);
46690	let r = _mm512_fmadd_ps(a, b, c);
46691	let e = _mm512_setr_ps(
46692	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
46693	);
46694	assert_eq_m512(r, e);
46695	}
46696
46697	#[simd_test(enable = "avx512f")]
46698	const fn test_mm512_mask_fmadd_ps() {
46699	let a = _mm512_set1_ps(`1.`);
46700	let b = _mm512_setr_ps(
46701	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46702	);
46703	let c = _mm512_set1_ps(`1.`);
46704	let r = _mm512_mask_fmadd_ps(a, `0`, b, c);
46705	assert_eq_m512(r, a);
46706	let r = _mm512_mask_fmadd_ps(a, `0b00000000_11111111`, b, c);
46707	let e = _mm512_setr_ps(
46708	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
46709	);
46710	assert_eq_m512(r, e);
46711	}
46712
46713	#[simd_test(enable = "avx512f")]
46714	const fn test_mm512_maskz_fmadd_ps() {
46715	let a = _mm512_set1_ps(`1.`);
46716	let b = _mm512_setr_ps(
46717	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46718	);
46719	let c = _mm512_set1_ps(`1.`);
46720	let r = _mm512_maskz_fmadd_ps(`0`, a, b, c);
46721	assert_eq_m512(r, _mm512_setzero_ps());
46722	let r = _mm512_maskz_fmadd_ps(`0b00000000_11111111`, a, b, c);
46723	let e = _mm512_setr_ps(
46724	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
46725	);
46726	assert_eq_m512(r, e);
46727	}
46728
46729	#[simd_test(enable = "avx512f")]
46730	const fn test_mm512_mask3_fmadd_ps() {
46731	let a = _mm512_set1_ps(`1.`);
46732	let b = _mm512_setr_ps(
46733	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46734	);
46735	let c = _mm512_set1_ps(`2.`);
46736	let r = _mm512_mask3_fmadd_ps(a, b, c, `0`);
46737	assert_eq_m512(r, c);
46738	let r = _mm512_mask3_fmadd_ps(a, b, c, `0b00000000_11111111`);
46739	let e = _mm512_setr_ps(
46740	`2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
46741	);
46742	assert_eq_m512(r, e);
46743	}
46744
46745	#[simd_test(enable = "avx512f,avx512vl")]
46746	const fn test_mm256_mask_fmadd_ps() {
46747	let a = _mm256_set1_ps(`1.`);
46748	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
46749	let c = _mm256_set1_ps(`1.`);
46750	let r = _mm256_mask_fmadd_ps(a, `0`, b, c);
46751	assert_eq_m256(r, a);
46752	let r = _mm256_mask_fmadd_ps(a, `0b11111111`, b, c);
46753	let e = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
46754	assert_eq_m256(r, e);
46755	}
46756
46757	#[simd_test(enable = "avx512f,avx512vl")]
46758	const fn test_mm256_maskz_fmadd_ps() {
46759	let a = _mm256_set1_ps(`1.`);
46760	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
46761	let c = _mm256_set1_ps(`1.`);
46762	let r = _mm256_maskz_fmadd_ps(`0`, a, b, c);
46763	assert_eq_m256(r, _mm256_setzero_ps());
46764	let r = _mm256_maskz_fmadd_ps(`0b11111111`, a, b, c);
46765	let e = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
46766	assert_eq_m256(r, e);
46767	}
46768
46769	#[simd_test(enable = "avx512f,avx512vl")]
46770	const fn test_mm256_mask3_fmadd_ps() {
46771	let a = _mm256_set1_ps(`1.`);
46772	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
46773	let c = _mm256_set1_ps(`1.`);
46774	let r = _mm256_mask3_fmadd_ps(a, b, c, `0`);
46775	assert_eq_m256(r, c);
46776	let r = _mm256_mask3_fmadd_ps(a, b, c, `0b11111111`);
46777	let e = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
46778	assert_eq_m256(r, e);
46779	}
46780
46781	#[simd_test(enable = "avx512f,avx512vl")]
46782	const fn test_mm_mask_fmadd_ps() {
46783	let a = _mm_set1_ps(`1.`);
46784	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
46785	let c = _mm_set1_ps(`1.`);
46786	let r = _mm_mask_fmadd_ps(a, `0`, b, c);
46787	assert_eq_m128(r, a);
46788	let r = _mm_mask_fmadd_ps(a, `0b00001111`, b, c);
46789	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
46790	assert_eq_m128(r, e);
46791	}
46792
46793	#[simd_test(enable = "avx512f,avx512vl")]
46794	const fn test_mm_maskz_fmadd_ps() {
46795	let a = _mm_set1_ps(`1.`);
46796	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
46797	let c = _mm_set1_ps(`1.`);
46798	let r = _mm_maskz_fmadd_ps(`0`, a, b, c);
46799	assert_eq_m128(r, _mm_setzero_ps());
46800	let r = _mm_maskz_fmadd_ps(`0b00001111`, a, b, c);
46801	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
46802	assert_eq_m128(r, e);
46803	}
46804
46805	#[simd_test(enable = "avx512f,avx512vl")]
46806	const fn test_mm_mask3_fmadd_ps() {
46807	let a = _mm_set1_ps(`1.`);
46808	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
46809	let c = _mm_set1_ps(`1.`);
46810	let r = _mm_mask3_fmadd_ps(a, b, c, `0`);
46811	assert_eq_m128(r, c);
46812	let r = _mm_mask3_fmadd_ps(a, b, c, `0b00001111`);
46813	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
46814	assert_eq_m128(r, e);
46815	}
46816
46817	#[simd_test(enable = "avx512f")]
46818	const fn test_mm512_fmsub_ps() {
46819	let a = _mm512_setr_ps(
46820	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
46821	);
46822	let b = _mm512_setr_ps(
46823	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46824	);
46825	let c = _mm512_setr_ps(
46826	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
46827	);
46828	let r = _mm512_fmsub_ps(a, b, c);
46829	let e = _mm512_setr_ps(
46830	`-1.`, `0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`,
46831	);
46832	assert_eq_m512(r, e);
46833	}
46834
46835	#[simd_test(enable = "avx512f")]
46836	const fn test_mm512_mask_fmsub_ps() {
46837	let a = _mm512_set1_ps(`1.`);
46838	let b = _mm512_setr_ps(
46839	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46840	);
46841	let c = _mm512_set1_ps(`1.`);
46842	let r = _mm512_mask_fmsub_ps(a, `0`, b, c);
46843	assert_eq_m512(r, a);
46844	let r = _mm512_mask_fmsub_ps(a, `0b00000000_11111111`, b, c);
46845	let e = _mm512_setr_ps(
46846	`-1.`, `0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
46847	);
46848	assert_eq_m512(r, e);
46849	}
46850
46851	#[simd_test(enable = "avx512f")]
46852	const fn test_mm512_maskz_fmsub_ps() {
46853	let a = _mm512_set1_ps(`1.`);
46854	let b = _mm512_setr_ps(
46855	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46856	);
46857	let c = _mm512_set1_ps(`1.`);
46858	let r = _mm512_maskz_fmsub_ps(`0`, a, b, c);
46859	assert_eq_m512(r, _mm512_setzero_ps());
46860	let r = _mm512_maskz_fmsub_ps(`0b00000000_11111111`, a, b, c);
46861	let e = _mm512_setr_ps(
46862	`-1.`, `0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
46863	);
46864	assert_eq_m512(r, e);
46865	}
46866
46867	#[simd_test(enable = "avx512f")]
46868	const fn test_mm512_mask3_fmsub_ps() {
46869	let a = _mm512_set1_ps(`1.`);
46870	let b = _mm512_setr_ps(
46871	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46872	);
46873	let c = _mm512_setr_ps(
46874	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
46875	);
46876	let r = _mm512_mask3_fmsub_ps(a, b, c, `0`);
46877	assert_eq_m512(r, c);
46878	let r = _mm512_mask3_fmsub_ps(a, b, c, `0b00000000_11111111`);
46879	let e = _mm512_setr_ps(
46880	`-1.`, `0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
46881	);
46882	assert_eq_m512(r, e);
46883	}
46884
46885	#[simd_test(enable = "avx512f,avx512vl")]
46886	const fn test_mm256_mask_fmsub_ps() {
46887	let a = _mm256_set1_ps(`1.`);
46888	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
46889	let c = _mm256_set1_ps(`1.`);
46890	let r = _mm256_mask_fmsub_ps(a, `0`, b, c);
46891	assert_eq_m256(r, a);
46892	let r = _mm256_mask_fmsub_ps(a, `0b11111111`, b, c);
46893	let e = _mm256_set_ps(`-1.`, `0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`);
46894	assert_eq_m256(r, e);
46895	}
46896
46897	#[simd_test(enable = "avx512f,avx512vl")]
46898	const fn test_mm256_maskz_fmsub_ps() {
46899	let a = _mm256_set1_ps(`1.`);
46900	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
46901	let c = _mm256_set1_ps(`1.`);
46902	let r = _mm256_maskz_fmsub_ps(`0`, a, b, c);
46903	assert_eq_m256(r, _mm256_setzero_ps());
46904	let r = _mm256_maskz_fmsub_ps(`0b11111111`, a, b, c);
46905	let e = _mm256_set_ps(`-1.`, `0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`);
46906	assert_eq_m256(r, e);
46907	}
46908
46909	#[simd_test(enable = "avx512f,avx512vl")]
46910	const fn test_mm256_mask3_fmsub_ps() {
46911	let a = _mm256_set1_ps(`1.`);
46912	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
46913	let c = _mm256_set1_ps(`1.`);
46914	let r = _mm256_mask3_fmsub_ps(a, b, c, `0`);
46915	assert_eq_m256(r, c);
46916	let r = _mm256_mask3_fmsub_ps(a, b, c, `0b11111111`);
46917	let e = _mm256_set_ps(`-1.`, `0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`);
46918	assert_eq_m256(r, e);
46919	}
46920
46921	#[simd_test(enable = "avx512f,avx512vl")]
46922	const fn test_mm_mask_fmsub_ps() {
46923	let a = _mm_set1_ps(`1.`);
46924	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
46925	let c = _mm_set1_ps(`1.`);
46926	let r = _mm_mask_fmsub_ps(a, `0`, b, c);
46927	assert_eq_m128(r, a);
46928	let r = _mm_mask_fmsub_ps(a, `0b00001111`, b, c);
46929	let e = _mm_set_ps(`-1.`, `0.`, `1.`, `2.`);
46930	assert_eq_m128(r, e);
46931	}
46932
46933	#[simd_test(enable = "avx512f,avx512vl")]
46934	const fn test_mm_maskz_fmsub_ps() {
46935	let a = _mm_set1_ps(`1.`);
46936	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
46937	let c = _mm_set1_ps(`1.`);
46938	let r = _mm_maskz_fmsub_ps(`0`, a, b, c);
46939	assert_eq_m128(r, _mm_setzero_ps());
46940	let r = _mm_maskz_fmsub_ps(`0b00001111`, a, b, c);
46941	let e = _mm_set_ps(`-1.`, `0.`, `1.`, `2.`);
46942	assert_eq_m128(r, e);
46943	}
46944
46945	#[simd_test(enable = "avx512f,avx512vl")]
46946	const fn test_mm_mask3_fmsub_ps() {
46947	let a = _mm_set1_ps(`1.`);
46948	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
46949	let c = _mm_set1_ps(`1.`);
46950	let r = _mm_mask3_fmsub_ps(a, b, c, `0`);
46951	assert_eq_m128(r, c);
46952	let r = _mm_mask3_fmsub_ps(a, b, c, `0b00001111`);
46953	let e = _mm_set_ps(`-1.`, `0.`, `1.`, `2.`);
46954	assert_eq_m128(r, e);
46955	}
46956
46957	#[simd_test(enable = "avx512f")]
46958	const fn test_mm512_fmaddsub_ps() {
46959	let a = _mm512_set1_ps(`1.`);
46960	let b = _mm512_setr_ps(
46961	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46962	);
46963	let c = _mm512_set1_ps(`1.`);
46964	let r = _mm512_fmaddsub_ps(a, b, c);
46965	let e = _mm512_setr_ps(
46966	`-1.`, `2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `7.`, `10.`, `9.`, `12.`, `11.`, `14.`, `13.`, `16.`,
46967	);
46968	assert_eq_m512(r, e);
46969	}
46970
46971	#[simd_test(enable = "avx512f")]
46972	const fn test_mm512_mask_fmaddsub_ps() {
46973	let a = _mm512_set1_ps(`1.`);
46974	let b = _mm512_setr_ps(
46975	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46976	);
46977	let c = _mm512_set1_ps(`1.`);
46978	let r = _mm512_mask_fmaddsub_ps(a, `0`, b, c);
46979	assert_eq_m512(r, a);
46980	let r = _mm512_mask_fmaddsub_ps(a, `0b00000000_11111111`, b, c);
46981	let e = _mm512_setr_ps(
46982	`-1.`, `2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
46983	);
46984	assert_eq_m512(r, e);
46985	}
46986
46987	#[simd_test(enable = "avx512f")]
46988	const fn test_mm512_maskz_fmaddsub_ps() {
46989	let a = _mm512_set1_ps(`1.`);
46990	let b = _mm512_setr_ps(
46991	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
46992	);
46993	let c = _mm512_set1_ps(`1.`);
46994	let r = _mm512_maskz_fmaddsub_ps(`0`, a, b, c);
46995	assert_eq_m512(r, _mm512_setzero_ps());
46996	let r = _mm512_maskz_fmaddsub_ps(`0b00000000_11111111`, a, b, c);
46997	let e = _mm512_setr_ps(
46998	`-1.`, `2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
46999	);
47000	assert_eq_m512(r, e);
47001	}
47002
47003	#[simd_test(enable = "avx512f")]
47004	const fn test_mm512_mask3_fmaddsub_ps() {
47005	let a = _mm512_set1_ps(`1.`);
47006	let b = _mm512_setr_ps(
47007	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
47008	);
47009	let c = _mm512_setr_ps(
47010	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
47011	);
47012	let r = _mm512_mask3_fmaddsub_ps(a, b, c, `0`);
47013	assert_eq_m512(r, c);
47014	let r = _mm512_mask3_fmaddsub_ps(a, b, c, `0b00000000_11111111`);
47015	let e = _mm512_setr_ps(
47016	`-1.`, `2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
47017	);
47018	assert_eq_m512(r, e);
47019	}
47020
47021	#[simd_test(enable = "avx512f,avx512vl")]
47022	const fn test_mm256_mask_fmaddsub_ps() {
47023	let a = _mm256_set1_ps(`1.`);
47024	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
47025	let c = _mm256_set1_ps(`1.`);
47026	let r = _mm256_mask_fmaddsub_ps(a, `0`, b, c);
47027	assert_eq_m256(r, a);
47028	let r = _mm256_mask_fmaddsub_ps(a, `0b11111111`, b, c);
47029	let e = _mm256_set_ps(`1.`, `0.`, `3.`, `2.`, `5.`, `4.`, `7.`, `6.`);
47030	assert_eq_m256(r, e);
47031	}
47032
47033	#[simd_test(enable = "avx512f,avx512vl")]
47034	const fn test_mm256_maskz_fmaddsub_ps() {
47035	let a = _mm256_set1_ps(`1.`);
47036	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
47037	let c = _mm256_set1_ps(`1.`);
47038	let r = _mm256_maskz_fmaddsub_ps(`0`, a, b, c);
47039	assert_eq_m256(r, _mm256_setzero_ps());
47040	let r = _mm256_maskz_fmaddsub_ps(`0b11111111`, a, b, c);
47041	let e = _mm256_set_ps(`1.`, `0.`, `3.`, `2.`, `5.`, `4.`, `7.`, `6.`);
47042	assert_eq_m256(r, e);
47043	}
47044
47045	#[simd_test(enable = "avx512f,avx512vl")]
47046	const fn test_mm256_mask3_fmaddsub_ps() {
47047	let a = _mm256_set1_ps(`1.`);
47048	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
47049	let c = _mm256_set1_ps(`1.`);
47050	let r = _mm256_mask3_fmaddsub_ps(a, b, c, `0`);
47051	assert_eq_m256(r, c);
47052	let r = _mm256_mask3_fmaddsub_ps(a, b, c, `0b11111111`);
47053	let e = _mm256_set_ps(`1.`, `0.`, `3.`, `2.`, `5.`, `4.`, `7.`, `6.`);
47054	assert_eq_m256(r, e);
47055	}
47056
47057	#[simd_test(enable = "avx512f,avx512vl")]
47058	const fn test_mm_mask_fmaddsub_ps() {
47059	let a = _mm_set1_ps(`1.`);
47060	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
47061	let c = _mm_set1_ps(`1.`);
47062	let r = _mm_mask_fmaddsub_ps(a, `0`, b, c);
47063	assert_eq_m128(r, a);
47064	let r = _mm_mask_fmaddsub_ps(a, `0b00001111`, b, c);
47065	let e = _mm_set_ps(`1.`, `0.`, `3.`, `2.`);
47066	assert_eq_m128(r, e);
47067	}
47068
47069	#[simd_test(enable = "avx512f,avx512vl")]
47070	const fn test_mm_maskz_fmaddsub_ps() {
47071	let a = _mm_set1_ps(`1.`);
47072	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
47073	let c = _mm_set1_ps(`1.`);
47074	let r = _mm_maskz_fmaddsub_ps(`0`, a, b, c);
47075	assert_eq_m128(r, _mm_setzero_ps());
47076	let r = _mm_maskz_fmaddsub_ps(`0b00001111`, a, b, c);
47077	let e = _mm_set_ps(`1.`, `0.`, `3.`, `2.`);
47078	assert_eq_m128(r, e);
47079	}
47080
47081	#[simd_test(enable = "avx512f,avx512vl")]
47082	const fn test_mm_mask3_fmaddsub_ps() {
47083	let a = _mm_set1_ps(`1.`);
47084	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
47085	let c = _mm_set1_ps(`1.`);
47086	let r = _mm_mask3_fmaddsub_ps(a, b, c, `0`);
47087	assert_eq_m128(r, c);
47088	let r = _mm_mask3_fmaddsub_ps(a, b, c, `0b00001111`);
47089	let e = _mm_set_ps(`1.`, `0.`, `3.`, `2.`);
47090	assert_eq_m128(r, e);
47091	}
47092
47093	#[simd_test(enable = "avx512f")]
47094	const fn test_mm512_fmsubadd_ps() {
47095	let a = _mm512_setr_ps(
47096	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
47097	);
47098	let b = _mm512_setr_ps(
47099	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
47100	);
47101	let c = _mm512_setr_ps(
47102	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
47103	);
47104	let r = _mm512_fmsubadd_ps(a, b, c);
47105	let e = _mm512_setr_ps(
47106	`1.`, `0.`, `3.`, `2.`, `5.`, `4.`, `7.`, `6.`, `9.`, `8.`, `11.`, `10.`, `13.`, `12.`, `15.`, `14.`,
47107	);
47108	assert_eq_m512(r, e);
47109	}
47110
47111	#[simd_test(enable = "avx512f")]
47112	const fn test_mm512_mask_fmsubadd_ps() {
47113	let a = _mm512_set1_ps(`1.`);
47114	let b = _mm512_setr_ps(
47115	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
47116	);
47117	let c = _mm512_set1_ps(`1.`);
47118	let r = _mm512_mask_fmsubadd_ps(a, `0`, b, c);
47119	assert_eq_m512(r, a);
47120	let r = _mm512_mask_fmsubadd_ps(a, `0b00000000_11111111`, b, c);
47121	let e = _mm512_setr_ps(
47122	`1.`, `0.`, `3.`, `2.`, `5.`, `4.`, `7.`, `6.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
47123	);
47124	assert_eq_m512(r, e);
47125	}
47126
47127	#[simd_test(enable = "avx512f")]
47128	const fn test_mm512_maskz_fmsubadd_ps() {
47129	let a = _mm512_set1_ps(`1.`);
47130	let b = _mm512_setr_ps(
47131	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
47132	);
47133	let c = _mm512_set1_ps(`1.`);
47134	let r = _mm512_maskz_fmsubadd_ps(`0`, a, b, c);
47135	assert_eq_m512(r, _mm512_setzero_ps());
47136	let r = _mm512_maskz_fmsubadd_ps(`0b00000000_11111111`, a, b, c);
47137	let e = _mm512_setr_ps(
47138	`1.`, `0.`, `3.`, `2.`, `5.`, `4.`, `7.`, `6.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
47139	);
47140	assert_eq_m512(r, e);
47141	}
47142
47143	#[simd_test(enable = "avx512f")]
47144	const fn test_mm512_mask3_fmsubadd_ps() {
47145	let a = _mm512_set1_ps(`1.`);
47146	let b = _mm512_setr_ps(
47147	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
47148	);
47149	let c = _mm512_setr_ps(
47150	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
47151	);
47152	let r = _mm512_mask3_fmsubadd_ps(a, b, c, `0`);
47153	assert_eq_m512(r, c);
47154	let r = _mm512_mask3_fmsubadd_ps(a, b, c, `0b00000000_11111111`);
47155	let e = _mm512_setr_ps(
47156	`1.`, `0.`, `3.`, `2.`, `5.`, `4.`, `7.`, `6.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
47157	);
47158	assert_eq_m512(r, e);
47159	}
47160
47161	#[simd_test(enable = "avx512f,avx512vl")]
47162	const fn test_mm256_mask_fmsubadd_ps() {
47163	let a = _mm256_set1_ps(`1.`);
47164	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
47165	let c = _mm256_set1_ps(`1.`);
47166	let r = _mm256_mask_fmsubadd_ps(a, `0`, b, c);
47167	assert_eq_m256(r, a);
47168	let r = _mm256_mask_fmsubadd_ps(a, `0b11111111`, b, c);
47169	let e = _mm256_set_ps(`-1.`, `2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`);
47170	assert_eq_m256(r, e);
47171	}
47172
47173	#[simd_test(enable = "avx512f,avx512vl")]
47174	const fn test_mm256_maskz_fmsubadd_ps() {
47175	let a = _mm256_set1_ps(`1.`);
47176	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
47177	let c = _mm256_set1_ps(`1.`);
47178	let r = _mm256_maskz_fmsubadd_ps(`0`, a, b, c);
47179	assert_eq_m256(r, _mm256_setzero_ps());
47180	let r = _mm256_maskz_fmsubadd_ps(`0b11111111`, a, b, c);
47181	let e = _mm256_set_ps(`-1.`, `2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`);
47182	assert_eq_m256(r, e);
47183	}
47184
47185	#[simd_test(enable = "avx512f,avx512vl")]
47186	const fn test_mm256_mask3_fmsubadd_ps() {
47187	let a = _mm256_set1_ps(`1.`);
47188	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
47189	let c = _mm256_set1_ps(`1.`);
47190	let r = _mm256_mask3_fmsubadd_ps(a, b, c, `0`);
47191	assert_eq_m256(r, c);
47192	let r = _mm256_mask3_fmsubadd_ps(a, b, c, `0b11111111`);
47193	let e = _mm256_set_ps(`-1.`, `2.`, `1.`, `4.`, `3.`, `6.`, `5.`, `8.`);
47194	assert_eq_m256(r, e);
47195	}
47196
47197	#[simd_test(enable = "avx512f,avx512vl")]
47198	const fn test_mm_mask_fmsubadd_ps() {
47199	let a = _mm_set1_ps(`1.`);
47200	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
47201	let c = _mm_set1_ps(`1.`);
47202	let r = _mm_mask_fmsubadd_ps(a, `0`, b, c);
47203	assert_eq_m128(r, a);
47204	let r = _mm_mask_fmsubadd_ps(a, `0b00001111`, b, c);
47205	let e = _mm_set_ps(`-1.`, `2.`, `1.`, `4.`);
47206	assert_eq_m128(r, e);
47207	}
47208
47209	#[simd_test(enable = "avx512f,avx512vl")]
47210	const fn test_mm_maskz_fmsubadd_ps() {
47211	let a = _mm_set1_ps(`1.`);
47212	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
47213	let c = _mm_set1_ps(`1.`);
47214	let r = _mm_maskz_fmsubadd_ps(`0`, a, b, c);
47215	assert_eq_m128(r, _mm_setzero_ps());
47216	let r = _mm_maskz_fmsubadd_ps(`0b00001111`, a, b, c);
47217	let e = _mm_set_ps(`-1.`, `2.`, `1.`, `4.`);
47218	assert_eq_m128(r, e);
47219	}
47220
47221	#[simd_test(enable = "avx512f,avx512vl")]
47222	const fn test_mm_mask3_fmsubadd_ps() {
47223	let a = _mm_set1_ps(`1.`);
47224	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
47225	let c = _mm_set1_ps(`1.`);
47226	let r = _mm_mask3_fmsubadd_ps(a, b, c, `0`);
47227	assert_eq_m128(r, c);
47228	let r = _mm_mask3_fmsubadd_ps(a, b, c, `0b00001111`);
47229	let e = _mm_set_ps(`-1.`, `2.`, `1.`, `4.`);
47230	assert_eq_m128(r, e);
47231	}
47232
47233	#[simd_test(enable = "avx512f")]
47234	const fn test_mm512_fnmadd_ps() {
47235	let a = _mm512_set1_ps(`1.`);
47236	let b = _mm512_setr_ps(
47237	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
47238	);
47239	let c = _mm512_set1_ps(`1.`);
47240	let r = _mm512_fnmadd_ps(a, b, c);
47241	let e = _mm512_setr_ps(
47242	`1.`, `0.`, `-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `-7.`, `-8.`, `-9.`, `-10.`, `-11.`, `-12.`, `-13.`, `-14.`,
47243	);
47244	assert_eq_m512(r, e);
47245	}
47246
47247	#[simd_test(enable = "avx512f")]
47248	const fn test_mm512_mask_fnmadd_ps() {
47249	let a = _mm512_set1_ps(`1.`);
47250	let b = _mm512_setr_ps(
47251	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
47252	);
47253	let c = _mm512_set1_ps(`1.`);
47254	let r = _mm512_mask_fnmadd_ps(a, `0`, b, c);
47255	assert_eq_m512(r, a);
47256	let r = _mm512_mask_fnmadd_ps(a, `0b00000000_11111111`, b, c);
47257	let e = _mm512_setr_ps(
47258	`1.`, `0.`, `-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
47259	);
47260	assert_eq_m512(r, e);
47261	}
47262
47263	#[simd_test(enable = "avx512f")]
47264	const fn test_mm512_maskz_fnmadd_ps() {
47265	let a = _mm512_set1_ps(`1.`);
47266	let b = _mm512_setr_ps(
47267	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
47268	);
47269	let c = _mm512_set1_ps(`1.`);
47270	let r = _mm512_maskz_fnmadd_ps(`0`, a, b, c);
47271	assert_eq_m512(r, _mm512_setzero_ps());
47272	let r = _mm512_maskz_fnmadd_ps(`0b00000000_11111111`, a, b, c);
47273	let e = _mm512_setr_ps(
47274	`1.`, `0.`, `-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
47275	);
47276	assert_eq_m512(r, e);
47277	}
47278
47279	#[simd_test(enable = "avx512f")]
47280	const fn test_mm512_mask3_fnmadd_ps() {
47281	let a = _mm512_set1_ps(`1.`);
47282	let b = _mm512_setr_ps(
47283	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
47284	);
47285	let c = _mm512_setr_ps(
47286	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
47287	);
47288	let r = _mm512_mask3_fnmadd_ps(a, b, c, `0`);
47289	assert_eq_m512(r, c);
47290	let r = _mm512_mask3_fnmadd_ps(a, b, c, `0b00000000_11111111`);
47291	let e = _mm512_setr_ps(
47292	`1.`, `0.`, `-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
47293	);
47294	assert_eq_m512(r, e);
47295	}
47296
47297	#[simd_test(enable = "avx512f,avx512vl")]
47298	const fn test_mm256_mask_fnmadd_ps() {
47299	let a = _mm256_set1_ps(`1.`);
47300	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
47301	let c = _mm256_set1_ps(`1.`);
47302	let r = _mm256_mask_fnmadd_ps(a, `0`, b, c);
47303	assert_eq_m256(r, a);
47304	let r = _mm256_mask_fnmadd_ps(a, `0b11111111`, b, c);
47305	let e = _mm256_set_ps(`1.`, `0.`, `-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`);
47306	assert_eq_m256(r, e);
47307	}
47308
47309	#[simd_test(enable = "avx512f,avx512vl")]
47310	const fn test_mm256_maskz_fnmadd_ps() {
47311	let a = _mm256_set1_ps(`1.`);
47312	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
47313	let c = _mm256_set1_ps(`1.`);
47314	let r = _mm256_maskz_fnmadd_ps(`0`, a, b, c);
47315	assert_eq_m256(r, _mm256_setzero_ps());
47316	let r = _mm256_maskz_fnmadd_ps(`0b11111111`, a, b, c);
47317	let e = _mm256_set_ps(`1.`, `0.`, `-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`);
47318	assert_eq_m256(r, e);
47319	}
47320
47321	#[simd_test(enable = "avx512f,avx512vl")]
47322	const fn test_mm256_mask3_fnmadd_ps() {
47323	let a = _mm256_set1_ps(`1.`);
47324	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
47325	let c = _mm256_set1_ps(`1.`);
47326	let r = _mm256_mask3_fnmadd_ps(a, b, c, `0`);
47327	assert_eq_m256(r, c);
47328	let r = _mm256_mask3_fnmadd_ps(a, b, c, `0b11111111`);
47329	let e = _mm256_set_ps(`1.`, `0.`, `-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`);
47330	assert_eq_m256(r, e);
47331	}
47332
47333	#[simd_test(enable = "avx512f,avx512vl")]
47334	const fn test_mm_mask_fnmadd_ps() {
47335	let a = _mm_set1_ps(`1.`);
47336	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
47337	let c = _mm_set1_ps(`1.`);
47338	let r = _mm_mask_fnmadd_ps(a, `0`, b, c);
47339	assert_eq_m128(r, a);
47340	let r = _mm_mask_fnmadd_ps(a, `0b00001111`, b, c);
47341	let e = _mm_set_ps(`1.`, `0.`, `-1.`, `-2.`);
47342	assert_eq_m128(r, e);
47343	}
47344
47345	#[simd_test(enable = "avx512f,avx512vl")]
47346	const fn test_mm_maskz_fnmadd_ps() {
47347	let a = _mm_set1_ps(`1.`);
47348	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
47349	let c = _mm_set1_ps(`1.`);
47350	let r = _mm_maskz_fnmadd_ps(`0`, a, b, c);
47351	assert_eq_m128(r, _mm_setzero_ps());
47352	let r = _mm_maskz_fnmadd_ps(`0b00001111`, a, b, c);
47353	let e = _mm_set_ps(`1.`, `0.`, `-1.`, `-2.`);
47354	assert_eq_m128(r, e);
47355	}
47356
47357	#[simd_test(enable = "avx512f,avx512vl")]
47358	const fn test_mm_mask3_fnmadd_ps() {
47359	let a = _mm_set1_ps(`1.`);
47360	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
47361	let c = _mm_set1_ps(`1.`);
47362	let r = _mm_mask3_fnmadd_ps(a, b, c, `0`);
47363	assert_eq_m128(r, c);
47364	let r = _mm_mask3_fnmadd_ps(a, b, c, `0b00001111`);
47365	let e = _mm_set_ps(`1.`, `0.`, `-1.`, `-2.`);
47366	assert_eq_m128(r, e);
47367	}
47368
47369	#[simd_test(enable = "avx512f")]
47370	const fn test_mm512_fnmsub_ps() {
47371	let a = _mm512_set1_ps(`1.`);
47372	let b = _mm512_setr_ps(
47373	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
47374	);
47375	let c = _mm512_set1_ps(`1.`);
47376	let r = _mm512_fnmsub_ps(a, b, c);
47377	let e = _mm512_setr_ps(
47378	`-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `-7.`, `-8.`, `-9.`, `-10.`, `-11.`, `-12.`, `-13.`, `-14.`, `-15.`, `-16.`,
47379	);
47380	assert_eq_m512(r, e);
47381	}
47382
47383	#[simd_test(enable = "avx512f")]
47384	const fn test_mm512_mask_fnmsub_ps() {
47385	let a = _mm512_set1_ps(`1.`);
47386	let b = _mm512_setr_ps(
47387	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
47388	);
47389	let c = _mm512_set1_ps(`1.`);
47390	let r = _mm512_mask_fnmsub_ps(a, `0`, b, c);
47391	assert_eq_m512(r, a);
47392	let r = _mm512_mask_fnmsub_ps(a, `0b00000000_11111111`, b, c);
47393	let e = _mm512_setr_ps(
47394	`-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `-7.`, `-8.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
47395	);
47396	assert_eq_m512(r, e);
47397	}
47398
47399	#[simd_test(enable = "avx512f")]
47400	const fn test_mm512_maskz_fnmsub_ps() {
47401	let a = _mm512_set1_ps(`1.`);
47402	let b = _mm512_setr_ps(
47403	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
47404	);
47405	let c = _mm512_set1_ps(`1.`);
47406	let r = _mm512_maskz_fnmsub_ps(`0`, a, b, c);
47407	assert_eq_m512(r, _mm512_setzero_ps());
47408	let r = _mm512_maskz_fnmsub_ps(`0b00000000_11111111`, a, b, c);
47409	let e = _mm512_setr_ps(
47410	`-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `-7.`, `-8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
47411	);
47412	assert_eq_m512(r, e);
47413	}
47414
47415	#[simd_test(enable = "avx512f")]
47416	const fn test_mm512_mask3_fnmsub_ps() {
47417	let a = _mm512_set1_ps(`1.`);
47418	let b = _mm512_setr_ps(
47419	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
47420	);
47421	let c = _mm512_setr_ps(
47422	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
47423	);
47424	let r = _mm512_mask3_fnmsub_ps(a, b, c, `0`);
47425	assert_eq_m512(r, c);
47426	let r = _mm512_mask3_fnmsub_ps(a, b, c, `0b00000000_11111111`);
47427	let e = _mm512_setr_ps(
47428	`-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `-7.`, `-8.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
47429	);
47430	assert_eq_m512(r, e);
47431	}
47432
47433	#[simd_test(enable = "avx512f,avx512vl")]
47434	const fn test_mm256_mask_fnmsub_ps() {
47435	let a = _mm256_set1_ps(`1.`);
47436	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
47437	let c = _mm256_set1_ps(`1.`);
47438	let r = _mm256_mask_fnmsub_ps(a, `0`, b, c);
47439	assert_eq_m256(r, a);
47440	let r = _mm256_mask_fnmsub_ps(a, `0b11111111`, b, c);
47441	let e = _mm256_set_ps(`-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `-7.`, `-8.`);
47442	assert_eq_m256(r, e);
47443	}
47444
47445	#[simd_test(enable = "avx512f,avx512vl")]
47446	const fn test_mm256_maskz_fnmsub_ps() {
47447	let a = _mm256_set1_ps(`1.`);
47448	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
47449	let c = _mm256_set1_ps(`1.`);
47450	let r = _mm256_maskz_fnmsub_ps(`0`, a, b, c);
47451	assert_eq_m256(r, _mm256_setzero_ps());
47452	let r = _mm256_maskz_fnmsub_ps(`0b11111111`, a, b, c);
47453	let e = _mm256_set_ps(`-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `-7.`, `-8.`);
47454	assert_eq_m256(r, e);
47455	}
47456
47457	#[simd_test(enable = "avx512f,avx512vl")]
47458	const fn test_mm256_mask3_fnmsub_ps() {
47459	let a = _mm256_set1_ps(`1.`);
47460	let b = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
47461	let c = _mm256_set1_ps(`1.`);
47462	let r = _mm256_mask3_fnmsub_ps(a, b, c, `0`);
47463	assert_eq_m256(r, c);
47464	let r = _mm256_mask3_fnmsub_ps(a, b, c, `0b11111111`);
47465	let e = _mm256_set_ps(`-1.`, `-2.`, `-3.`, `-4.`, `-5.`, `-6.`, `-7.`, `-8.`);
47466	assert_eq_m256(r, e);
47467	}
47468
47469	#[simd_test(enable = "avx512f,avx512vl")]
47470	const fn test_mm_mask_fnmsub_ps() {
47471	let a = _mm_set1_ps(`1.`);
47472	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
47473	let c = _mm_set1_ps(`1.`);
47474	let r = _mm_mask_fnmsub_ps(a, `0`, b, c);
47475	assert_eq_m128(r, a);
47476	let r = _mm_mask_fnmsub_ps(a, `0b00001111`, b, c);
47477	let e = _mm_set_ps(`-1.`, `-2.`, `-3.`, `-4.`);
47478	assert_eq_m128(r, e);
47479	}
47480
47481	#[simd_test(enable = "avx512f,avx512vl")]
47482	const fn test_mm_maskz_fnmsub_ps() {
47483	let a = _mm_set1_ps(`1.`);
47484	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
47485	let c = _mm_set1_ps(`1.`);
47486	let r = _mm_maskz_fnmsub_ps(`0`, a, b, c);
47487	assert_eq_m128(r, _mm_setzero_ps());
47488	let r = _mm_maskz_fnmsub_ps(`0b00001111`, a, b, c);
47489	let e = _mm_set_ps(`-1.`, `-2.`, `-3.`, `-4.`);
47490	assert_eq_m128(r, e);
47491	}
47492
47493	#[simd_test(enable = "avx512f,avx512vl")]
47494	const fn test_mm_mask3_fnmsub_ps() {
47495	let a = _mm_set1_ps(`1.`);
47496	let b = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
47497	let c = _mm_set1_ps(`1.`);
47498	let r = _mm_mask3_fnmsub_ps(a, b, c, `0`);
47499	assert_eq_m128(r, c);
47500	let r = _mm_mask3_fnmsub_ps(a, b, c, `0b00001111`);
47501	let e = _mm_set_ps(`-1.`, `-2.`, `-3.`, `-4.`);
47502	assert_eq_m128(r, e);
47503	}
47504
47505	#[simd_test(enable = "avx512f")]
47506	fn test_mm512_rcp14_ps() {
47507	let a = _mm512_set1_ps(`3.`);
47508	let r = _mm512_rcp14_ps(a);
47509	let e = _mm512_set1_ps(`0.33333206`);
47510	assert_eq_m512(r, e);
47511	}
47512
47513	#[simd_test(enable = "avx512f")]
47514	fn test_mm512_mask_rcp14_ps() {
47515	let a = _mm512_set1_ps(`3.`);
47516	let r = _mm512_mask_rcp14_ps(a, `0`, a);
47517	assert_eq_m512(r, a);
47518	let r = _mm512_mask_rcp14_ps(a, `0b11111111_00000000`, a);
47519	let e = _mm512_setr_ps(
47520	`3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `0.33333206`, `0.33333206`, `0.33333206`, `0.33333206`,
47521	`0.33333206`, `0.33333206`, `0.33333206`, `0.33333206`,
47522	);
47523	assert_eq_m512(r, e);
47524	}
47525
47526	#[simd_test(enable = "avx512f")]
47527	fn test_mm512_maskz_rcp14_ps() {
47528	let a = _mm512_set1_ps(`3.`);
47529	let r = _mm512_maskz_rcp14_ps(`0`, a);
47530	assert_eq_m512(r, _mm512_setzero_ps());
47531	let r = _mm512_maskz_rcp14_ps(`0b11111111_00000000`, a);
47532	let e = _mm512_setr_ps(
47533	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.33333206`, `0.33333206`, `0.33333206`, `0.33333206`,
47534	`0.33333206`, `0.33333206`, `0.33333206`, `0.33333206`,
47535	);
47536	assert_eq_m512(r, e);
47537	}
47538
47539	#[simd_test(enable = "avx512f,avx512vl")]
47540	fn test_mm256_rcp14_ps() {
47541	let a = _mm256_set1_ps(`3.`);
47542	let r = _mm256_rcp14_ps(a);
47543	let e = _mm256_set1_ps(`0.33333206`);
47544	assert_eq_m256(r, e);
47545	}
47546
47547	#[simd_test(enable = "avx512f,avx512vl")]
47548	fn test_mm256_mask_rcp14_ps() {
47549	let a = _mm256_set1_ps(`3.`);
47550	let r = _mm256_mask_rcp14_ps(a, `0`, a);
47551	assert_eq_m256(r, a);
47552	let r = _mm256_mask_rcp14_ps(a, `0b11111111`, a);
47553	let e = _mm256_set1_ps(`0.33333206`);
47554	assert_eq_m256(r, e);
47555	}
47556
47557	#[simd_test(enable = "avx512f,avx512vl")]
47558	fn test_mm256_maskz_rcp14_ps() {
47559	let a = _mm256_set1_ps(`3.`);
47560	let r = _mm256_maskz_rcp14_ps(`0`, a);
47561	assert_eq_m256(r, _mm256_setzero_ps());
47562	let r = _mm256_maskz_rcp14_ps(`0b11111111`, a);
47563	let e = _mm256_set1_ps(`0.33333206`);
47564	assert_eq_m256(r, e);
47565	}
47566
47567	#[simd_test(enable = "avx512f,avx512vl")]
47568	fn test_mm_rcp14_ps() {
47569	let a = _mm_set1_ps(`3.`);
47570	let r = _mm_rcp14_ps(a);
47571	let e = _mm_set1_ps(`0.33333206`);
47572	assert_eq_m128(r, e);
47573	}
47574
47575	#[simd_test(enable = "avx512f,avx512vl")]
47576	fn test_mm_mask_rcp14_ps() {
47577	let a = _mm_set1_ps(`3.`);
47578	let r = _mm_mask_rcp14_ps(a, `0`, a);
47579	assert_eq_m128(r, a);
47580	let r = _mm_mask_rcp14_ps(a, `0b00001111`, a);
47581	let e = _mm_set1_ps(`0.33333206`);
47582	assert_eq_m128(r, e);
47583	}
47584
47585	#[simd_test(enable = "avx512f,avx512vl")]
47586	fn test_mm_maskz_rcp14_ps() {
47587	let a = _mm_set1_ps(`3.`);
47588	let r = _mm_maskz_rcp14_ps(`0`, a);
47589	assert_eq_m128(r, _mm_setzero_ps());
47590	let r = _mm_maskz_rcp14_ps(`0b00001111`, a);
47591	let e = _mm_set1_ps(`0.33333206`);
47592	assert_eq_m128(r, e);
47593	}
47594
47595	#[simd_test(enable = "avx512f")]
47596	fn test_mm512_rsqrt14_ps() {
47597	let a = _mm512_set1_ps(`3.`);
47598	let r = _mm512_rsqrt14_ps(a);
47599	let e = _mm512_set1_ps(`0.5773392`);
47600	assert_eq_m512(r, e);
47601	}
47602
47603	#[simd_test(enable = "avx512f")]
47604	fn test_mm512_mask_rsqrt14_ps() {
47605	let a = _mm512_set1_ps(`3.`);
47606	let r = _mm512_mask_rsqrt14_ps(a, `0`, a);
47607	assert_eq_m512(r, a);
47608	let r = _mm512_mask_rsqrt14_ps(a, `0b11111111_00000000`, a);
47609	let e = _mm512_setr_ps(
47610	`3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `0.5773392`, `0.5773392`, `0.5773392`, `0.5773392`, `0.5773392`,
47611	`0.5773392`, `0.5773392`, `0.5773392`,
47612	);
47613	assert_eq_m512(r, e);
47614	}
47615
47616	#[simd_test(enable = "avx512f")]
47617	fn test_mm512_maskz_rsqrt14_ps() {
47618	let a = _mm512_set1_ps(`3.`);
47619	let r = _mm512_maskz_rsqrt14_ps(`0`, a);
47620	assert_eq_m512(r, _mm512_setzero_ps());
47621	let r = _mm512_maskz_rsqrt14_ps(`0b11111111_00000000`, a);
47622	let e = _mm512_setr_ps(
47623	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.5773392`, `0.5773392`, `0.5773392`, `0.5773392`, `0.5773392`,
47624	`0.5773392`, `0.5773392`, `0.5773392`,
47625	);
47626	assert_eq_m512(r, e);
47627	}
47628
47629	#[simd_test(enable = "avx512f,avx512vl")]
47630	fn test_mm256_rsqrt14_ps() {
47631	let a = _mm256_set1_ps(`3.`);
47632	let r = _mm256_rsqrt14_ps(a);
47633	let e = _mm256_set1_ps(`0.5773392`);
47634	assert_eq_m256(r, e);
47635	}
47636
47637	#[simd_test(enable = "avx512f,avx512vl")]
47638	fn test_mm256_mask_rsqrt14_ps() {
47639	let a = _mm256_set1_ps(`3.`);
47640	let r = _mm256_mask_rsqrt14_ps(a, `0`, a);
47641	assert_eq_m256(r, a);
47642	let r = _mm256_mask_rsqrt14_ps(a, `0b11111111`, a);
47643	let e = _mm256_set1_ps(`0.5773392`);
47644	assert_eq_m256(r, e);
47645	}
47646
47647	#[simd_test(enable = "avx512f,avx512vl")]
47648	fn test_mm256_maskz_rsqrt14_ps() {
47649	let a = _mm256_set1_ps(`3.`);
47650	let r = _mm256_maskz_rsqrt14_ps(`0`, a);
47651	assert_eq_m256(r, _mm256_setzero_ps());
47652	let r = _mm256_maskz_rsqrt14_ps(`0b11111111`, a);
47653	let e = _mm256_set1_ps(`0.5773392`);
47654	assert_eq_m256(r, e);
47655	}
47656
47657	#[simd_test(enable = "avx512f,avx512vl")]
47658	fn test_mm_rsqrt14_ps() {
47659	let a = _mm_set1_ps(`3.`);
47660	let r = _mm_rsqrt14_ps(a);
47661	let e = _mm_set1_ps(`0.5773392`);
47662	assert_eq_m128(r, e);
47663	}
47664
47665	#[simd_test(enable = "avx512f,avx512vl")]
47666	fn test_mm_mask_rsqrt14_ps() {
47667	let a = _mm_set1_ps(`3.`);
47668	let r = _mm_mask_rsqrt14_ps(a, `0`, a);
47669	assert_eq_m128(r, a);
47670	let r = _mm_mask_rsqrt14_ps(a, `0b00001111`, a);
47671	let e = _mm_set1_ps(`0.5773392`);
47672	assert_eq_m128(r, e);
47673	}
47674
47675	#[simd_test(enable = "avx512f,avx512vl")]
47676	fn test_mm_maskz_rsqrt14_ps() {
47677	let a = _mm_set1_ps(`3.`);
47678	let r = _mm_maskz_rsqrt14_ps(`0`, a);
47679	assert_eq_m128(r, _mm_setzero_ps());
47680	let r = _mm_maskz_rsqrt14_ps(`0b00001111`, a);
47681	let e = _mm_set1_ps(`0.5773392`);
47682	assert_eq_m128(r, e);
47683	}
47684
47685	#[simd_test(enable = "avx512f")]
47686	fn test_mm512_getexp_ps() {
47687	let a = _mm512_set1_ps(`3.`);
47688	let r = _mm512_getexp_ps(a);
47689	let e = _mm512_set1_ps(`1.`);
47690	assert_eq_m512(r, e);
47691	}
47692
47693	#[simd_test(enable = "avx512f")]
47694	fn test_mm512_mask_getexp_ps() {
47695	let a = _mm512_set1_ps(`3.`);
47696	let r = _mm512_mask_getexp_ps(a, `0`, a);
47697	assert_eq_m512(r, a);
47698	let r = _mm512_mask_getexp_ps(a, `0b11111111_00000000`, a);
47699	let e = _mm512_setr_ps(
47700	`3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
47701	);
47702	assert_eq_m512(r, e);
47703	}
47704
47705	#[simd_test(enable = "avx512f")]
47706	fn test_mm512_maskz_getexp_ps() {
47707	let a = _mm512_set1_ps(`3.`);
47708	let r = _mm512_maskz_getexp_ps(`0`, a);
47709	assert_eq_m512(r, _mm512_setzero_ps());
47710	let r = _mm512_maskz_getexp_ps(`0b11111111_00000000`, a);
47711	let e = _mm512_setr_ps(
47712	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
47713	);
47714	assert_eq_m512(r, e);
47715	}
47716
47717	#[simd_test(enable = "avx512f,avx512vl")]
47718	fn test_mm256_getexp_ps() {
47719	let a = _mm256_set1_ps(`3.`);
47720	let r = _mm256_getexp_ps(a);
47721	let e = _mm256_set1_ps(`1.`);
47722	assert_eq_m256(r, e);
47723	}
47724
47725	#[simd_test(enable = "avx512f,avx512vl")]
47726	fn test_mm256_mask_getexp_ps() {
47727	let a = _mm256_set1_ps(`3.`);
47728	let r = _mm256_mask_getexp_ps(a, `0`, a);
47729	assert_eq_m256(r, a);
47730	let r = _mm256_mask_getexp_ps(a, `0b11111111`, a);
47731	let e = _mm256_set1_ps(`1.`);
47732	assert_eq_m256(r, e);
47733	}
47734
47735	#[simd_test(enable = "avx512f,avx512vl")]
47736	fn test_mm256_maskz_getexp_ps() {
47737	let a = _mm256_set1_ps(`3.`);
47738	let r = _mm256_maskz_getexp_ps(`0`, a);
47739	assert_eq_m256(r, _mm256_setzero_ps());
47740	let r = _mm256_maskz_getexp_ps(`0b11111111`, a);
47741	let e = _mm256_set1_ps(`1.`);
47742	assert_eq_m256(r, e);
47743	}
47744
47745	#[simd_test(enable = "avx512f,avx512vl")]
47746	fn test_mm_getexp_ps() {
47747	let a = _mm_set1_ps(`3.`);
47748	let r = _mm_getexp_ps(a);
47749	let e = _mm_set1_ps(`1.`);
47750	assert_eq_m128(r, e);
47751	}
47752
47753	#[simd_test(enable = "avx512f,avx512vl")]
47754	fn test_mm_mask_getexp_ps() {
47755	let a = _mm_set1_ps(`3.`);
47756	let r = _mm_mask_getexp_ps(a, `0`, a);
47757	assert_eq_m128(r, a);
47758	let r = _mm_mask_getexp_ps(a, `0b00001111`, a);
47759	let e = _mm_set1_ps(`1.`);
47760	assert_eq_m128(r, e);
47761	}
47762
47763	#[simd_test(enable = "avx512f,avx512vl")]
47764	fn test_mm_maskz_getexp_ps() {
47765	let a = _mm_set1_ps(`3.`);
47766	let r = _mm_maskz_getexp_ps(`0`, a);
47767	assert_eq_m128(r, _mm_setzero_ps());
47768	let r = _mm_maskz_getexp_ps(`0b00001111`, a);
47769	let e = _mm_set1_ps(`1.`);
47770	assert_eq_m128(r, e);
47771	}
47772
47773	#[simd_test(enable = "avx512f")]
47774	fn test_mm512_roundscale_ps() {
47775	let a = _mm512_set1_ps(`1.1`);
47776	let r = _mm512_roundscale_ps::<`0b00_00_00_00`>(a);
47777	let e = _mm512_set1_ps(`1.0`);
47778	assert_eq_m512(r, e);
47779	}
47780
47781	#[simd_test(enable = "avx512f")]
47782	fn test_mm512_mask_roundscale_ps() {
47783	let a = _mm512_set1_ps(`1.1`);
47784	let r = _mm512_mask_roundscale_ps::<`0b00_00_00_00`>(a, `0`, a);
47785	let e = _mm512_set1_ps(`1.1`);
47786	assert_eq_m512(r, e);
47787	let r = _mm512_mask_roundscale_ps::<`0b00_00_00_00`>(a, `0b11111111_11111111`, a);
47788	let e = _mm512_set1_ps(`1.0`);
47789	assert_eq_m512(r, e);
47790	}
47791
47792	#[simd_test(enable = "avx512f")]
47793	fn test_mm512_maskz_roundscale_ps() {
47794	let a = _mm512_set1_ps(`1.1`);
47795	let r = _mm512_maskz_roundscale_ps::<`0b00_00_00_00`>(`0`, a);
47796	assert_eq_m512(r, _mm512_setzero_ps());
47797	let r = _mm512_maskz_roundscale_ps::<`0b00_00_00_00`>(`0b11111111_11111111`, a);
47798	let e = _mm512_set1_ps(`1.0`);
47799	assert_eq_m512(r, e);
47800	}
47801
47802	#[simd_test(enable = "avx512f,avx512vl")]
47803	fn test_mm256_roundscale_ps() {
47804	let a = _mm256_set1_ps(`1.1`);
47805	let r = _mm256_roundscale_ps::<`0b00_00_00_00`>(a);
47806	let e = _mm256_set1_ps(`1.0`);
47807	assert_eq_m256(r, e);
47808	}
47809
47810	#[simd_test(enable = "avx512f,avx512vl")]
47811	fn test_mm256_mask_roundscale_ps() {
47812	let a = _mm256_set1_ps(`1.1`);
47813	let r = _mm256_mask_roundscale_ps::<`0b00_00_00_00`>(a, `0`, a);
47814	let e = _mm256_set1_ps(`1.1`);
47815	assert_eq_m256(r, e);
47816	let r = _mm256_mask_roundscale_ps::<`0b00_00_00_00`>(a, `0b11111111`, a);
47817	let e = _mm256_set1_ps(`1.0`);
47818	assert_eq_m256(r, e);
47819	}
47820
47821	#[simd_test(enable = "avx512f,avx512vl")]
47822	fn test_mm256_maskz_roundscale_ps() {
47823	let a = _mm256_set1_ps(`1.1`);
47824	let r = _mm256_maskz_roundscale_ps::<`0b00_00_00_00`>(`0`, a);
47825	assert_eq_m256(r, _mm256_setzero_ps());
47826	let r = _mm256_maskz_roundscale_ps::<`0b00_00_00_00`>(`0b11111111`, a);
47827	let e = _mm256_set1_ps(`1.0`);
47828	assert_eq_m256(r, e);
47829	}
47830
47831	#[simd_test(enable = "avx512f,avx512vl")]
47832	fn test_mm_roundscale_ps() {
47833	let a = _mm_set1_ps(`1.1`);
47834	let r = _mm_roundscale_ps::<`0b00_00_00_00`>(a);
47835	let e = _mm_set1_ps(`1.0`);
47836	assert_eq_m128(r, e);
47837	}
47838
47839	#[simd_test(enable = "avx512f,avx512vl")]
47840	fn test_mm_mask_roundscale_ps() {
47841	let a = _mm_set1_ps(`1.1`);
47842	let r = _mm_mask_roundscale_ps::<`0b00_00_00_00`>(a, `0`, a);
47843	let e = _mm_set1_ps(`1.1`);
47844	assert_eq_m128(r, e);
47845	let r = _mm_mask_roundscale_ps::<`0b00_00_00_00`>(a, `0b00001111`, a);
47846	let e = _mm_set1_ps(`1.0`);
47847	assert_eq_m128(r, e);
47848	}
47849
47850	#[simd_test(enable = "avx512f,avx512vl")]
47851	fn test_mm_maskz_roundscale_ps() {
47852	let a = _mm_set1_ps(`1.1`);
47853	let r = _mm_maskz_roundscale_ps::<`0b00_00_00_00`>(`0`, a);
47854	assert_eq_m128(r, _mm_setzero_ps());
47855	let r = _mm_maskz_roundscale_ps::<`0b00_00_00_00`>(`0b00001111`, a);
47856	let e = _mm_set1_ps(`1.0`);
47857	assert_eq_m128(r, e);
47858	}
47859
47860	#[simd_test(enable = "avx512f")]
47861	fn test_mm512_scalef_ps() {
47862	let a = _mm512_set1_ps(`1.`);
47863	let b = _mm512_set1_ps(`3.`);
47864	let r = _mm512_scalef_ps(a, b);
47865	let e = _mm512_set1_ps(`8.`);
47866	assert_eq_m512(r, e);
47867	}
47868
47869	#[simd_test(enable = "avx512f")]
47870	fn test_mm512_mask_scalef_ps() {
47871	let a = _mm512_set1_ps(`1.`);
47872	let b = _mm512_set1_ps(`3.`);
47873	let r = _mm512_mask_scalef_ps(a, `0`, a, b);
47874	assert_eq_m512(r, a);
47875	let r = _mm512_mask_scalef_ps(a, `0b11111111_00000000`, a, b);
47876	let e = _mm512_set_ps(
47877	`8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
47878	);
47879	assert_eq_m512(r, e);
47880	}
47881
47882	#[simd_test(enable = "avx512f")]
47883	fn test_mm512_maskz_scalef_ps() {
47884	let a = _mm512_set1_ps(`1.`);
47885	let b = _mm512_set1_ps(`3.`);
47886	let r = _mm512_maskz_scalef_ps(`0`, a, b);
47887	assert_eq_m512(r, _mm512_setzero_ps());
47888	let r = _mm512_maskz_scalef_ps(`0b11111111_00000000`, a, b);
47889	let e = _mm512_set_ps(
47890	`8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
47891	);
47892	assert_eq_m512(r, e);
47893	}
47894
47895	#[simd_test(enable = "avx512f,avx512vl")]
47896	fn test_mm256_scalef_ps() {
47897	let a = _mm256_set1_ps(`1.`);
47898	let b = _mm256_set1_ps(`3.`);
47899	let r = _mm256_scalef_ps(a, b);
47900	let e = _mm256_set1_ps(`8.`);
47901	assert_eq_m256(r, e);
47902	}
47903
47904	#[simd_test(enable = "avx512f,avx512vl")]
47905	fn test_mm256_mask_scalef_ps() {
47906	let a = _mm256_set1_ps(`1.`);
47907	let b = _mm256_set1_ps(`3.`);
47908	let r = _mm256_mask_scalef_ps(a, `0`, a, b);
47909	assert_eq_m256(r, a);
47910	let r = _mm256_mask_scalef_ps(a, `0b11111111`, a, b);
47911	let e = _mm256_set1_ps(`8.`);
47912	assert_eq_m256(r, e);
47913	}
47914
47915	#[simd_test(enable = "avx512f,avx512vl")]
47916	fn test_mm256_maskz_scalef_ps() {
47917	let a = _mm256_set1_ps(`1.`);
47918	let b = _mm256_set1_ps(`3.`);
47919	let r = _mm256_maskz_scalef_ps(`0`, a, b);
47920	assert_eq_m256(r, _mm256_setzero_ps());
47921	let r = _mm256_maskz_scalef_ps(`0b11111111`, a, b);
47922	let e = _mm256_set1_ps(`8.`);
47923	assert_eq_m256(r, e);
47924	}
47925
47926	#[simd_test(enable = "avx512f,avx512vl")]
47927	fn test_mm_scalef_ps() {
47928	let a = _mm_set1_ps(`1.`);
47929	let b = _mm_set1_ps(`3.`);
47930	let r = _mm_scalef_ps(a, b);
47931	let e = _mm_set1_ps(`8.`);
47932	assert_eq_m128(r, e);
47933	}
47934
47935	#[simd_test(enable = "avx512f,avx512vl")]
47936	fn test_mm_mask_scalef_ps() {
47937	let a = _mm_set1_ps(`1.`);
47938	let b = _mm_set1_ps(`3.`);
47939	let r = _mm_mask_scalef_ps(a, `0`, a, b);
47940	assert_eq_m128(r, a);
47941	let r = _mm_mask_scalef_ps(a, `0b00001111`, a, b);
47942	let e = _mm_set1_ps(`8.`);
47943	assert_eq_m128(r, e);
47944	}
47945
47946	#[simd_test(enable = "avx512f,avx512vl")]
47947	fn test_mm_maskz_scalef_ps() {
47948	let a = _mm_set1_ps(`1.`);
47949	let b = _mm_set1_ps(`3.`);
47950	let r = _mm_maskz_scalef_ps(`0`, a, b);
47951	assert_eq_m128(r, _mm_setzero_ps());
47952	let r = _mm_maskz_scalef_ps(`0b00001111`, a, b);
47953	let e = _mm_set1_ps(`8.`);
47954	assert_eq_m128(r, e);
47955	}
47956
47957	#[simd_test(enable = "avx512f")]
47958	fn test_mm512_fixupimm_ps() {
47959	let a = _mm512_set1_ps(f32::NAN);
47960	let b = _mm512_set1_ps(f32::MAX);
47961	let c = _mm512_set1_epi32(i32::MAX);
47962	//let r = _mm512_fixupimm_ps(a, b, c, 5);
47963	let r = _mm512_fixupimm_ps::<`5`>(a, b, c);
47964	let e = _mm512_set1_ps(`0.0`);
47965	assert_eq_m512(r, e);
47966	}
47967
47968	#[simd_test(enable = "avx512f")]
47969	fn test_mm512_mask_fixupimm_ps() {
47970	#[rustfmt::skip]
47971	let a = _mm512_set_ps(
47972	f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47973	f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47974	`1.`, `1.`, `1.`, `1.`,
47975	`1.`, `1.`, `1.`, `1.`,
47976	);
47977	let b = _mm512_set1_ps(f32::MAX);
47978	let c = _mm512_set1_epi32(i32::MAX);
47979	let r = _mm512_mask_fixupimm_ps::<`5`>(a, `0b11111111_00000000`, b, c);
47980	let e = _mm512_set_ps(
47981	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
47982	);
47983	assert_eq_m512(r, e);
47984	}
47985
47986	#[simd_test(enable = "avx512f")]
47987	fn test_mm512_maskz_fixupimm_ps() {
47988	#[rustfmt::skip]
47989	let a = _mm512_set_ps(
47990	f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47991	f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47992	`1.`, `1.`, `1.`, `1.`,
47993	`1.`, `1.`, `1.`, `1.`,
47994	);
47995	let b = _mm512_set1_ps(f32::MAX);
47996	let c = _mm512_set1_epi32(i32::MAX);
47997	let r = _mm512_maskz_fixupimm_ps::<`5`>(`0b11111111_00000000`, a, b, c);
47998	let e = _mm512_set_ps(
47999	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
48000	);
48001	assert_eq_m512(r, e);
48002	}
48003
48004	#[simd_test(enable = "avx512f,avx512vl")]
48005	fn test_mm256_fixupimm_ps() {
48006	let a = _mm256_set1_ps(f32::NAN);
48007	let b = _mm256_set1_ps(f32::MAX);
48008	let c = _mm256_set1_epi32(i32::MAX);
48009	let r = _mm256_fixupimm_ps::<`5`>(a, b, c);
48010	let e = _mm256_set1_ps(`0.0`);
48011	assert_eq_m256(r, e);
48012	}
48013
48014	#[simd_test(enable = "avx512f,avx512vl")]
48015	fn test_mm256_mask_fixupimm_ps() {
48016	let a = _mm256_set1_ps(f32::NAN);
48017	let b = _mm256_set1_ps(f32::MAX);
48018	let c = _mm256_set1_epi32(i32::MAX);
48019	let r = _mm256_mask_fixupimm_ps::<`5`>(a, `0b11111111`, b, c);
48020	let e = _mm256_set1_ps(`0.0`);
48021	assert_eq_m256(r, e);
48022	}
48023
48024	#[simd_test(enable = "avx512f,avx512vl")]
48025	fn test_mm256_maskz_fixupimm_ps() {
48026	let a = _mm256_set1_ps(f32::NAN);
48027	let b = _mm256_set1_ps(f32::MAX);
48028	let c = _mm256_set1_epi32(i32::MAX);
48029	let r = _mm256_maskz_fixupimm_ps::<`5`>(`0b11111111`, a, b, c);
48030	let e = _mm256_set1_ps(`0.0`);
48031	assert_eq_m256(r, e);
48032	}
48033
48034	#[simd_test(enable = "avx512f,avx512vl")]
48035	fn test_mm_fixupimm_ps() {
48036	let a = _mm_set1_ps(f32::NAN);
48037	let b = _mm_set1_ps(f32::MAX);
48038	let c = _mm_set1_epi32(i32::MAX);
48039	let r = _mm_fixupimm_ps::<`5`>(a, b, c);
48040	let e = _mm_set1_ps(`0.0`);
48041	assert_eq_m128(r, e);
48042	}
48043
48044	#[simd_test(enable = "avx512f,avx512vl")]
48045	fn test_mm_mask_fixupimm_ps() {
48046	let a = _mm_set1_ps(f32::NAN);
48047	let b = _mm_set1_ps(f32::MAX);
48048	let c = _mm_set1_epi32(i32::MAX);
48049	let r = _mm_mask_fixupimm_ps::<`5`>(a, `0b00001111`, b, c);
48050	let e = _mm_set1_ps(`0.0`);
48051	assert_eq_m128(r, e);
48052	}
48053
48054	#[simd_test(enable = "avx512f,avx512vl")]
48055	fn test_mm_maskz_fixupimm_ps() {
48056	let a = _mm_set1_ps(f32::NAN);
48057	let b = _mm_set1_ps(f32::MAX);
48058	let c = _mm_set1_epi32(i32::MAX);
48059	let r = _mm_maskz_fixupimm_ps::<`5`>(`0b00001111`, a, b, c);
48060	let e = _mm_set1_ps(`0.0`);
48061	assert_eq_m128(r, e);
48062	}
48063
48064	#[simd_test(enable = "avx512f")]
48065	fn test_mm512_ternarylogic_epi32() {
48066	let a = _mm512_set4_epi32(`0b100`, `0b110`, `0b001`, `0b101`);
48067	let b = _mm512_set4_epi32(`0b010`, `0b011`, `0b001`, `0b110`);
48068	let c = _mm512_set4_epi32(`0b001`, `0b000`, `0b001`, `0b111`);
48069
48070	// Identity of A.
48071	let r = _mm512_ternarylogic_epi32::<`0b1111_0000`>(a, b, c);
48072	assert_eq_m512i(r, a);
48073
48074	// Bitwise xor.
48075	let r = _mm512_ternarylogic_epi32::<`0b10010110`>(a, b, c);
48076	let e = _mm512_set4_epi32(`0b111`, `0b101`, `0b001`, `0b100`);
48077	assert_eq_m512i(r, e);
48078	assert_eq_m512i(r, _mm512_xor_si512(_mm512_xor_si512(a, b), c));
48079
48080	// Majority (2 or more bits set).
48081	let r = _mm512_ternarylogic_epi32::<`0b1110_1000`>(a, b, c);
48082	let e = _mm512_set4_epi32(`0b000`, `0b010`, `0b001`, `0b111`);
48083	assert_eq_m512i(r, e);
48084	}
48085
48086	#[simd_test(enable = "avx512f")]
48087	fn test_mm512_mask_ternarylogic_epi32() {
48088	let src = _mm512_set1_epi32(`1` << `2`);
48089	let a = _mm512_set1_epi32(`1` << `1`);
48090	let b = _mm512_set1_epi32(`1` << `0`);
48091	let r = _mm512_mask_ternarylogic_epi32::<`8`>(src, `0`, a, b);
48092	assert_eq_m512i(r, src);
48093	let r = _mm512_mask_ternarylogic_epi32::<`8`>(src, `0b11111111_11111111`, a, b);
48094	let e = _mm512_set1_epi32(`0`);
48095	assert_eq_m512i(r, e);
48096	}
48097
48098	#[simd_test(enable = "avx512f")]
48099	fn test_mm512_maskz_ternarylogic_epi32() {
48100	let a = _mm512_set1_epi32(`1` << `2`);
48101	let b = _mm512_set1_epi32(`1` << `1`);
48102	let c = _mm512_set1_epi32(`1` << `0`);
48103	let r = _mm512_maskz_ternarylogic_epi32::<`9`>(`0`, a, b, c);
48104	assert_eq_m512i(r, _mm512_setzero_si512());
48105	let r = _mm512_maskz_ternarylogic_epi32::<`8`>(`0b11111111_11111111`, a, b, c);
48106	let e = _mm512_set1_epi32(`0`);
48107	assert_eq_m512i(r, e);
48108	}
48109
48110	#[simd_test(enable = "avx512f,avx512vl")]
48111	fn test_mm256_ternarylogic_epi32() {
48112	let _mm256_set4_epi32 = \|a, b, c, d\| _mm256_setr_epi32(a, b, c, d, a, b, c, d);
48113
48114	let a = _mm256_set4_epi32(`0b100`, `0b110`, `0b001`, `0b101`);
48115	let b = _mm256_set4_epi32(`0b010`, `0b011`, `0b001`, `0b110`);
48116	let c = _mm256_set4_epi32(`0b001`, `0b000`, `0b001`, `0b111`);
48117
48118	// Identity of A.
48119	let r = _mm256_ternarylogic_epi32::<`0b1111_0000`>(a, b, c);
48120	assert_eq_m256i(r, a);
48121
48122	// Bitwise xor.
48123	let r = _mm256_ternarylogic_epi32::<`0b10010110`>(a, b, c);
48124	let e = _mm256_set4_epi32(`0b111`, `0b101`, `0b001`, `0b100`);
48125	assert_eq_m256i(r, e);
48126	assert_eq_m256i(r, _mm256_xor_si256(_mm256_xor_si256(a, b), c));
48127
48128	// Majority (2 or more bits set).
48129	let r = _mm256_ternarylogic_epi32::<`0b1110_1000`>(a, b, c);
48130	let e = _mm256_set4_epi32(`0b000`, `0b010`, `0b001`, `0b111`);
48131	assert_eq_m256i(r, e);
48132	}
48133
48134	#[simd_test(enable = "avx512f,avx512vl")]
48135	fn test_mm256_mask_ternarylogic_epi32() {
48136	let src = _mm256_set1_epi32(`1` << `2`);
48137	let a = _mm256_set1_epi32(`1` << `1`);
48138	let b = _mm256_set1_epi32(`1` << `0`);
48139	let r = _mm256_mask_ternarylogic_epi32::<`8`>(src, `0`, a, b);
48140	assert_eq_m256i(r, src);
48141	let r = _mm256_mask_ternarylogic_epi32::<`8`>(src, `0b11111111`, a, b);
48142	let e = _mm256_set1_epi32(`0`);
48143	assert_eq_m256i(r, e);
48144	}
48145
48146	#[simd_test(enable = "avx512f,avx512vl")]
48147	fn test_mm256_maskz_ternarylogic_epi32() {
48148	let a = _mm256_set1_epi32(`1` << `2`);
48149	let b = _mm256_set1_epi32(`1` << `1`);
48150	let c = _mm256_set1_epi32(`1` << `0`);
48151	let r = _mm256_maskz_ternarylogic_epi32::<`9`>(`0`, a, b, c);
48152	assert_eq_m256i(r, _mm256_setzero_si256());
48153	let r = _mm256_maskz_ternarylogic_epi32::<`8`>(`0b11111111`, a, b, c);
48154	let e = _mm256_set1_epi32(`0`);
48155	assert_eq_m256i(r, e);
48156	}
48157
48158	#[simd_test(enable = "avx512f,avx512vl")]
48159	fn test_mm_ternarylogic_epi32() {
48160	let a = _mm_setr_epi32(`0b100`, `0b110`, `0b001`, `0b101`);
48161	let b = _mm_setr_epi32(`0b010`, `0b011`, `0b001`, `0b110`);
48162	let c = _mm_setr_epi32(`0b001`, `0b000`, `0b001`, `0b111`);
48163
48164	// Identity of A.
48165	let r = _mm_ternarylogic_epi32::<`0b1111_0000`>(a, b, c);
48166	assert_eq_m128i(r, a);
48167
48168	// Bitwise xor.
48169	let r = _mm_ternarylogic_epi32::<`0b10010110`>(a, b, c);
48170	let e = _mm_setr_epi32(`0b111`, `0b101`, `0b001`, `0b100`);
48171	assert_eq_m128i(r, e);
48172	assert_eq_m128i(r, _mm_xor_si128(_mm_xor_si128(a, b), c));
48173
48174	// Majority (2 or more bits set).
48175	let r = _mm_ternarylogic_epi32::<`0b1110_1000`>(a, b, c);
48176	let e = _mm_setr_epi32(`0b000`, `0b010`, `0b001`, `0b111`);
48177	assert_eq_m128i(r, e);
48178	}
48179
48180	#[simd_test(enable = "avx512f,avx512vl")]
48181	fn test_mm_mask_ternarylogic_epi32() {
48182	let src = _mm_set1_epi32(`1` << `2`);
48183	let a = _mm_set1_epi32(`1` << `1`);
48184	let b = _mm_set1_epi32(`1` << `0`);
48185	let r = _mm_mask_ternarylogic_epi32::<`8`>(src, `0`, a, b);
48186	assert_eq_m128i(r, src);
48187	let r = _mm_mask_ternarylogic_epi32::<`8`>(src, `0b00001111`, a, b);
48188	let e = _mm_set1_epi32(`0`);
48189	assert_eq_m128i(r, e);
48190	}
48191
48192	#[simd_test(enable = "avx512f,avx512vl")]
48193	fn test_mm_maskz_ternarylogic_epi32() {
48194	let a = _mm_set1_epi32(`1` << `2`);
48195	let b = _mm_set1_epi32(`1` << `1`);
48196	let c = _mm_set1_epi32(`1` << `0`);
48197	let r = _mm_maskz_ternarylogic_epi32::<`9`>(`0`, a, b, c);
48198	assert_eq_m128i(r, _mm_setzero_si128());
48199	let r = _mm_maskz_ternarylogic_epi32::<`8`>(`0b00001111`, a, b, c);
48200	let e = _mm_set1_epi32(`0`);
48201	assert_eq_m128i(r, e);
48202	}
48203
48204	#[simd_test(enable = "avx512f")]
48205	fn test_mm512_getmant_ps() {
48206	let a = _mm512_set1_ps(`10.`);
48207	let r = _mm512_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
48208	let e = _mm512_set1_ps(`1.25`);
48209	assert_eq_m512(r, e);
48210	}
48211
48212	#[simd_test(enable = "avx512f")]
48213	fn test_mm512_mask_getmant_ps() {
48214	let a = _mm512_set1_ps(`10.`);
48215	let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0`, a);
48216	assert_eq_m512(r, a);
48217	let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(
48218	a,
48219	`0b11111111_00000000`,
48220	a,
48221	);
48222	let e = _mm512_setr_ps(
48223	`10.`, `10.`, `10.`, `10.`, `10.`, `10.`, `10.`, `10.`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`,
48224	);
48225	assert_eq_m512(r, e);
48226	}
48227
48228	#[simd_test(enable = "avx512f")]
48229	fn test_mm512_maskz_getmant_ps() {
48230	let a = _mm512_set1_ps(`10.`);
48231	let r = _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0`, a);
48232	assert_eq_m512(r, _mm512_setzero_ps());
48233	let r =
48234	_mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0b11111111_00000000`, a);
48235	let e = _mm512_setr_ps(
48236	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`,
48237	);
48238	assert_eq_m512(r, e);
48239	}
48240
48241	#[simd_test(enable = "avx512f,avx512vl")]
48242	fn test_mm256_getmant_ps() {
48243	let a = _mm256_set1_ps(`10.`);
48244	let r = _mm256_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
48245	let e = _mm256_set1_ps(`1.25`);
48246	assert_eq_m256(r, e);
48247	}
48248
48249	#[simd_test(enable = "avx512f,avx512vl")]
48250	fn test_mm256_mask_getmant_ps() {
48251	let a = _mm256_set1_ps(`10.`);
48252	let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0`, a);
48253	assert_eq_m256(r, a);
48254	let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0b11111111`, a);
48255	let e = _mm256_set1_ps(`1.25`);
48256	assert_eq_m256(r, e);
48257	}
48258
48259	#[simd_test(enable = "avx512f,avx512vl")]
48260	fn test_mm256_maskz_getmant_ps() {
48261	let a = _mm256_set1_ps(`10.`);
48262	let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0`, a);
48263	assert_eq_m256(r, _mm256_setzero_ps());
48264	let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0b11111111`, a);
48265	let e = _mm256_set1_ps(`1.25`);
48266	assert_eq_m256(r, e);
48267	}
48268
48269	#[simd_test(enable = "avx512f,avx512vl")]
48270	fn test_mm_getmant_ps() {
48271	let a = _mm_set1_ps(`10.`);
48272	let r = _mm_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
48273	let e = _mm_set1_ps(`1.25`);
48274	assert_eq_m128(r, e);
48275	}
48276
48277	#[simd_test(enable = "avx512f,avx512vl")]
48278	fn test_mm_mask_getmant_ps() {
48279	let a = _mm_set1_ps(`10.`);
48280	let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0`, a);
48281	assert_eq_m128(r, a);
48282	let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0b00001111`, a);
48283	let e = _mm_set1_ps(`1.25`);
48284	assert_eq_m128(r, e);
48285	}
48286
48287	#[simd_test(enable = "avx512f,avx512vl")]
48288	fn test_mm_maskz_getmant_ps() {
48289	let a = _mm_set1_ps(`10.`);
48290	let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0`, a);
48291	assert_eq_m128(r, _mm_setzero_ps());
48292	let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0b00001111`, a);
48293	let e = _mm_set1_ps(`1.25`);
48294	assert_eq_m128(r, e);
48295	}
48296
48297	#[simd_test(enable = "avx512f")]
48298	fn test_mm512_add_round_ps() {
48299	let a = _mm512_setr_ps(
48300	`0.`, `1.5`, `2.`, `3.5`, `4.`, `5.5`, `6.`, `7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `0.00000007`,
48301	);
48302	let b = _mm512_set1_ps(`-1.`);
48303	let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
48304	#[rustfmt::skip]
48305	let e = _mm512_setr_ps(
48306	`-1.`, `0.5`, `1.`, `2.5`,
48307	`3.`, `4.5`, `5.`, `6.5`,
48308	`7.`, `8.5`, `9.`, `10.5`,
48309	`11.`, `12.5`, `13.`, `-0.99999994`,
48310	);
48311	assert_eq_m512(r, e);
48312	let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
48313	let e = _mm512_setr_ps(
48314	`-1.`, `0.5`, `1.`, `2.5`, `3.`, `4.5`, `5.`, `6.5`, `7.`, `8.5`, `9.`, `10.5`, `11.`, `12.5`, `13.`, `-0.9999999`,
48315	);
48316	assert_eq_m512(r, e);
48317	}
48318
48319	#[simd_test(enable = "avx512f")]
48320	fn test_mm512_mask_add_round_ps() {
48321	let a = _mm512_setr_ps(
48322	`0.`, `1.5`, `2.`, `3.5`, `4.`, `5.5`, `6.`, `7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `0.00000007`,
48323	);
48324	let b = _mm512_set1_ps(`-1.`);
48325	let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, `0`, a, b);
48326	assert_eq_m512(r, a);
48327	let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48328	a,
48329	`0b11111111_00000000`,
48330	a,
48331	b,
48332	);
48333	#[rustfmt::skip]
48334	let e = _mm512_setr_ps(
48335	`0.`, `1.5`, `2.`, `3.5`,
48336	`4.`, `5.5`, `6.`, `7.5`,
48337	`7.`, `8.5`, `9.`, `10.5`,
48338	`11.`, `12.5`, `13.`, `-0.99999994`,
48339	);
48340	assert_eq_m512(r, e);
48341	}
48342
48343	#[simd_test(enable = "avx512f")]
48344	fn test_mm512_maskz_add_round_ps() {
48345	let a = _mm512_setr_ps(
48346	`0.`, `1.5`, `2.`, `3.5`, `4.`, `5.5`, `6.`, `7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `0.00000007`,
48347	);
48348	let b = _mm512_set1_ps(`-1.`);
48349	let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
48350	assert_eq_m512(r, _mm512_setzero_ps());
48351	let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48352	`0b11111111_00000000`,
48353	a,
48354	b,
48355	);
48356	#[rustfmt::skip]
48357	let e = _mm512_setr_ps(
48358	`0.`, `0.`, `0.`, `0.`,
48359	`0.`, `0.`, `0.`, `0.`,
48360	`7.`, `8.5`, `9.`, `10.5`,
48361	`11.`, `12.5`, `13.`, `-0.99999994`,
48362	);
48363	assert_eq_m512(r, e);
48364	}
48365
48366	#[simd_test(enable = "avx512f")]
48367	fn test_mm512_sub_round_ps() {
48368	let a = _mm512_setr_ps(
48369	`0.`, `1.5`, `2.`, `3.5`, `4.`, `5.5`, `6.`, `7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `0.00000007`,
48370	);
48371	let b = _mm512_set1_ps(`1.`);
48372	let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
48373	#[rustfmt::skip]
48374	let e = _mm512_setr_ps(
48375	`-1.`, `0.5`, `1.`, `2.5`,
48376	`3.`, `4.5`, `5.`, `6.5`,
48377	`7.`, `8.5`, `9.`, `10.5`,
48378	`11.`, `12.5`, `13.`, `-0.99999994`,
48379	);
48380	assert_eq_m512(r, e);
48381	let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
48382	let e = _mm512_setr_ps(
48383	`-1.`, `0.5`, `1.`, `2.5`, `3.`, `4.5`, `5.`, `6.5`, `7.`, `8.5`, `9.`, `10.5`, `11.`, `12.5`, `13.`, `-0.9999999`,
48384	);
48385	assert_eq_m512(r, e);
48386	}
48387
48388	#[simd_test(enable = "avx512f")]
48389	fn test_mm512_mask_sub_round_ps() {
48390	let a = _mm512_setr_ps(
48391	`0.`, `1.5`, `2.`, `3.5`, `4.`, `5.5`, `6.`, `7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `0.00000007`,
48392	);
48393	let b = _mm512_set1_ps(`1.`);
48394	let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48395	a, `0`, a, b,
48396	);
48397	assert_eq_m512(r, a);
48398	let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48399	a,
48400	`0b11111111_00000000`,
48401	a,
48402	b,
48403	);
48404	#[rustfmt::skip]
48405	let e = _mm512_setr_ps(
48406	`0.`, `1.5`, `2.`, `3.5`,
48407	`4.`, `5.5`, `6.`, `7.5`,
48408	`7.`, `8.5`, `9.`, `10.5`,
48409	`11.`, `12.5`, `13.`, `-0.99999994`,
48410	);
48411	assert_eq_m512(r, e);
48412	}
48413
48414	#[simd_test(enable = "avx512f")]
48415	fn test_mm512_maskz_sub_round_ps() {
48416	let a = _mm512_setr_ps(
48417	`0.`, `1.5`, `2.`, `3.5`, `4.`, `5.5`, `6.`, `7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `0.00000007`,
48418	);
48419	let b = _mm512_set1_ps(`1.`);
48420	let r =
48421	_mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
48422	assert_eq_m512(r, _mm512_setzero_ps());
48423	let r = _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48424	`0b11111111_00000000`,
48425	a,
48426	b,
48427	);
48428	#[rustfmt::skip]
48429	let e = _mm512_setr_ps(
48430	`0.`, `0.`, `0.`, `0.`,
48431	`0.`, `0.`, `0.`, `0.`,
48432	`7.`, `8.5`, `9.`, `10.5`,
48433	`11.`, `12.5`, `13.`, `-0.99999994`,
48434	);
48435	assert_eq_m512(r, e);
48436	}
48437
48438	#[simd_test(enable = "avx512f")]
48439	fn test_mm512_mul_round_ps() {
48440	#[rustfmt::skip]
48441	let a = _mm512_setr_ps(
48442	`0.`, `1.5`, `2.`, `3.5`,
48443	`4.`, `5.5`, `6.`, `7.5`,
48444	`8.`, `9.5`, `10.`, `11.5`,
48445	`12.`, `13.5`, `14.`, `0.00000000000000000000007`,
48446	);
48447	let b = _mm512_set1_ps(`0.1`);
48448	let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
48449	#[rustfmt::skip]
48450	let e = _mm512_setr_ps(
48451	`0.`, `0.15`, `0.2`, `0.35`,
48452	`0.4`, `0.55`, `0.6`, `0.75`,
48453	`0.8`, `0.95`, `1.0`, `1.15`,
48454	`1.2`, `1.35`, `1.4`, `0.000000000000000000000007000001`,
48455	);
48456	assert_eq_m512(r, e);
48457	let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
48458	#[rustfmt::skip]
48459	let e = _mm512_setr_ps(
48460	`0.`, `0.14999999`, `0.2`, `0.35`,
48461	`0.4`, `0.54999995`, `0.59999996`, `0.75`,
48462	`0.8`, `0.95`, `1.0`, `1.15`,
48463	`1.1999999`, `1.3499999`, `1.4`, `0.000000000000000000000007`,
48464	);
48465	assert_eq_m512(r, e);
48466	}
48467
48468	#[simd_test(enable = "avx512f")]
48469	fn test_mm512_mask_mul_round_ps() {
48470	#[rustfmt::skip]
48471	let a = _mm512_setr_ps(
48472	`0.`, `1.5`, `2.`, `3.5`,
48473	`4.`, `5.5`, `6.`, `7.5`,
48474	`8.`, `9.5`, `10.`, `11.5`,
48475	`12.`, `13.5`, `14.`, `0.00000000000000000000007`,
48476	);
48477	let b = _mm512_set1_ps(`0.1`);
48478	let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48479	a, `0`, a, b,
48480	);
48481	assert_eq_m512(r, a);
48482	let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48483	a,
48484	`0b11111111_00000000`,
48485	a,
48486	b,
48487	);
48488	#[rustfmt::skip]
48489	let e = _mm512_setr_ps(
48490	`0.`, `1.5`, `2.`, `3.5`,
48491	`4.`, `5.5`, `6.`, `7.5`,
48492	`0.8`, `0.95`, `1.0`, `1.15`,
48493	`1.2`, `1.35`, `1.4`, `0.000000000000000000000007000001`,
48494	);
48495	assert_eq_m512(r, e);
48496	}
48497
48498	#[simd_test(enable = "avx512f")]
48499	fn test_mm512_maskz_mul_round_ps() {
48500	#[rustfmt::skip]
48501	let a = _mm512_setr_ps(
48502	`0.`, `1.5`, `2.`, `3.5`,
48503	`4.`, `5.5`, `6.`, `7.5`,
48504	`8.`, `9.5`, `10.`, `11.5`,
48505	`12.`, `13.5`, `14.`, `0.00000000000000000000007`,
48506	);
48507	let b = _mm512_set1_ps(`0.1`);
48508	let r =
48509	_mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
48510	assert_eq_m512(r, _mm512_setzero_ps());
48511	let r = _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48512	`0b11111111_00000000`,
48513	a,
48514	b,
48515	);
48516	#[rustfmt::skip]
48517	let e = _mm512_setr_ps(
48518	`0.`, `0.`, `0.`, `0.`,
48519	`0.`, `0.`, `0.`, `0.`,
48520	`0.8`, `0.95`, `1.0`, `1.15`,
48521	`1.2`, `1.35`, `1.4`, `0.000000000000000000000007000001`,
48522	);
48523	assert_eq_m512(r, e);
48524	}
48525
48526	#[simd_test(enable = "avx512f")]
48527	fn test_mm512_div_round_ps() {
48528	let a = _mm512_set1_ps(`1.`);
48529	let b = _mm512_set1_ps(`3.`);
48530	let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
48531	let e = _mm512_set1_ps(`0.33333334`);
48532	assert_eq_m512(r, e);
48533	let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
48534	let e = _mm512_set1_ps(`0.3333333`);
48535	assert_eq_m512(r, e);
48536	}
48537
48538	#[simd_test(enable = "avx512f")]
48539	fn test_mm512_mask_div_round_ps() {
48540	let a = _mm512_set1_ps(`1.`);
48541	let b = _mm512_set1_ps(`3.`);
48542	let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48543	a, `0`, a, b,
48544	);
48545	assert_eq_m512(r, a);
48546	let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48547	a,
48548	`0b11111111_00000000`,
48549	a,
48550	b,
48551	);
48552	let e = _mm512_setr_ps(
48553	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `0.33333334`, `0.33333334`, `0.33333334`, `0.33333334`,
48554	`0.33333334`, `0.33333334`, `0.33333334`, `0.33333334`,
48555	);
48556	assert_eq_m512(r, e);
48557	}
48558
48559	#[simd_test(enable = "avx512f")]
48560	fn test_mm512_maskz_div_round_ps() {
48561	let a = _mm512_set1_ps(`1.`);
48562	let b = _mm512_set1_ps(`3.`);
48563	let r =
48564	_mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
48565	assert_eq_m512(r, _mm512_setzero_ps());
48566	let r = _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48567	`0b11111111_00000000`,
48568	a,
48569	b,
48570	);
48571	let e = _mm512_setr_ps(
48572	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.33333334`, `0.33333334`, `0.33333334`, `0.33333334`,
48573	`0.33333334`, `0.33333334`, `0.33333334`, `0.33333334`,
48574	);
48575	assert_eq_m512(r, e);
48576	}
48577
48578	#[simd_test(enable = "avx512f")]
48579	fn test_mm512_sqrt_round_ps() {
48580	let a = _mm512_set1_ps(`3.`);
48581	let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
48582	let e = _mm512_set1_ps(`1.7320508`);
48583	assert_eq_m512(r, e);
48584	let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_POS_INF \| _MM_FROUND_NO_EXC }>(a);
48585	let e = _mm512_set1_ps(`1.7320509`);
48586	assert_eq_m512(r, e);
48587	}
48588
48589	#[simd_test(enable = "avx512f")]
48590	fn test_mm512_mask_sqrt_round_ps() {
48591	let a = _mm512_set1_ps(`3.`);
48592	let r =
48593	_mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, `0`, a);
48594	assert_eq_m512(r, a);
48595	let r = _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48596	a,
48597	`0b11111111_00000000`,
48598	a,
48599	);
48600	let e = _mm512_setr_ps(
48601	`3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `1.7320508`, `1.7320508`, `1.7320508`, `1.7320508`, `1.7320508`,
48602	`1.7320508`, `1.7320508`, `1.7320508`,
48603	);
48604	assert_eq_m512(r, e);
48605	}
48606
48607	#[simd_test(enable = "avx512f")]
48608	fn test_mm512_maskz_sqrt_round_ps() {
48609	let a = _mm512_set1_ps(`3.`);
48610	let r =
48611	_mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a);
48612	assert_eq_m512(r, _mm512_setzero_ps());
48613	let r = _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48614	`0b11111111_00000000`,
48615	a,
48616	);
48617	let e = _mm512_setr_ps(
48618	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `1.7320508`, `1.7320508`, `1.7320508`, `1.7320508`, `1.7320508`,
48619	`1.7320508`, `1.7320508`, `1.7320508`,
48620	);
48621	assert_eq_m512(r, e);
48622	}
48623
48624	#[simd_test(enable = "avx512f")]
48625	fn test_mm512_fmadd_round_ps() {
48626	let a = _mm512_set1_ps(`0.00000007`);
48627	let b = _mm512_set1_ps(`1.`);
48628	let c = _mm512_set1_ps(`-1.`);
48629	let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
48630	let e = _mm512_set1_ps(`-0.99999994`);
48631	assert_eq_m512(r, e);
48632	let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b, c);
48633	let e = _mm512_set1_ps(`-0.9999999`);
48634	assert_eq_m512(r, e);
48635	}
48636
48637	#[simd_test(enable = "avx512f")]
48638	fn test_mm512_mask_fmadd_round_ps() {
48639	let a = _mm512_set1_ps(`0.00000007`);
48640	let b = _mm512_set1_ps(`1.`);
48641	let c = _mm512_set1_ps(`-1.`);
48642	let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48643	a, `0`, b, c,
48644	);
48645	assert_eq_m512(r, a);
48646	let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48647	a,
48648	`0b00000000_11111111`,
48649	b,
48650	c,
48651	);
48652	#[rustfmt::skip]
48653	let e = _mm512_setr_ps(
48654	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
48655	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
48656	`0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
48657	`0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
48658	);
48659	assert_eq_m512(r, e);
48660	}
48661
48662	#[simd_test(enable = "avx512f")]
48663	fn test_mm512_maskz_fmadd_round_ps() {
48664	let a = _mm512_set1_ps(`0.00000007`);
48665	let b = _mm512_set1_ps(`1.`);
48666	let c = _mm512_set1_ps(`-1.`);
48667	let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48668	`0`, a, b, c,
48669	);
48670	assert_eq_m512(r, _mm512_setzero_ps());
48671	#[rustfmt::skip]
48672	let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48673	`0b00000000_11111111`,
48674	a,
48675	b,
48676	c,
48677	);
48678	#[rustfmt::skip]
48679	let e = _mm512_setr_ps(
48680	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
48681	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
48682	`0.`, `0.`, `0.`, `0.`,
48683	`0.`, `0.`, `0.`, `0.`,
48684	);
48685	assert_eq_m512(r, e);
48686	}
48687
48688	#[simd_test(enable = "avx512f")]
48689	fn test_mm512_mask3_fmadd_round_ps() {
48690	let a = _mm512_set1_ps(`0.00000007`);
48691	let b = _mm512_set1_ps(`1.`);
48692	let c = _mm512_set1_ps(`-1.`);
48693	let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48694	a, b, c, `0`,
48695	);
48696	assert_eq_m512(r, c);
48697	let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48698	a,
48699	b,
48700	c,
48701	`0b00000000_11111111`,
48702	);
48703	#[rustfmt::skip]
48704	let e = _mm512_setr_ps(
48705	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
48706	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
48707	`-1.`, `-1.`, `-1.`, `-1.`,
48708	`-1.`, `-1.`, `-1.`, `-1.`,
48709	);
48710	assert_eq_m512(r, e);
48711	}
48712
48713	#[simd_test(enable = "avx512f")]
48714	fn test_mm512_fmsub_round_ps() {
48715	let a = _mm512_set1_ps(`0.00000007`);
48716	let b = _mm512_set1_ps(`1.`);
48717	let c = _mm512_set1_ps(`1.`);
48718	let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
48719	let e = _mm512_set1_ps(`-0.99999994`);
48720	assert_eq_m512(r, e);
48721	let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b, c);
48722	let e = _mm512_set1_ps(`-0.9999999`);
48723	assert_eq_m512(r, e);
48724	}
48725
48726	#[simd_test(enable = "avx512f")]
48727	fn test_mm512_mask_fmsub_round_ps() {
48728	let a = _mm512_set1_ps(`0.00000007`);
48729	let b = _mm512_set1_ps(`1.`);
48730	let c = _mm512_set1_ps(`1.`);
48731	let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48732	a, `0`, b, c,
48733	);
48734	assert_eq_m512(r, a);
48735	let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48736	a,
48737	`0b00000000_11111111`,
48738	b,
48739	c,
48740	);
48741	#[rustfmt::skip]
48742	let e = _mm512_setr_ps(
48743	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
48744	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
48745	`0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
48746	`0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
48747	);
48748	assert_eq_m512(r, e);
48749	}
48750
48751	#[simd_test(enable = "avx512f")]
48752	fn test_mm512_maskz_fmsub_round_ps() {
48753	let a = _mm512_set1_ps(`0.00000007`);
48754	let b = _mm512_set1_ps(`1.`);
48755	let c = _mm512_set1_ps(`1.`);
48756	let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48757	`0`, a, b, c,
48758	);
48759	assert_eq_m512(r, _mm512_setzero_ps());
48760	let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48761	`0b00000000_11111111`,
48762	a,
48763	b,
48764	c,
48765	);
48766	#[rustfmt::skip]
48767	let e = _mm512_setr_ps(
48768	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
48769	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
48770	`0.`, `0.`, `0.`, `0.`,
48771	`0.`, `0.`, `0.`, `0.`,
48772	);
48773	assert_eq_m512(r, e);
48774	}
48775
48776	#[simd_test(enable = "avx512f")]
48777	fn test_mm512_mask3_fmsub_round_ps() {
48778	let a = _mm512_set1_ps(`0.00000007`);
48779	let b = _mm512_set1_ps(`1.`);
48780	let c = _mm512_set1_ps(`1.`);
48781	let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48782	a, b, c, `0`,
48783	);
48784	assert_eq_m512(r, c);
48785	let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48786	a,
48787	b,
48788	c,
48789	`0b00000000_11111111`,
48790	);
48791	#[rustfmt::skip]
48792	let e = _mm512_setr_ps(
48793	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
48794	`-0.99999994`, `-0.99999994`, `-0.99999994`, `-0.99999994`,
48795	`1.`, `1.`, `1.`, `1.`,
48796	`1.`, `1.`, `1.`, `1.`,
48797	);
48798	assert_eq_m512(r, e);
48799	}
48800
48801	#[simd_test(enable = "avx512f")]
48802	fn test_mm512_fmaddsub_round_ps() {
48803	let a = _mm512_set1_ps(`0.00000007`);
48804	let b = _mm512_set1_ps(`1.`);
48805	let c = _mm512_set1_ps(`-1.`);
48806	let r =
48807	_mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
48808	#[rustfmt::skip]
48809	let e = _mm512_setr_ps(
48810	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
48811	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
48812	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
48813	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
48814	);
48815	assert_eq_m512(r, e);
48816	let r = _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b, c);
48817	let e = _mm512_setr_ps(
48818	`1.`, `-0.9999999`, `1.`, `-0.9999999`, `1.`, `-0.9999999`, `1.`, `-0.9999999`, `1.`, `-0.9999999`, `1.`,
48819	`-0.9999999`, `1.`, `-0.9999999`, `1.`, `-0.9999999`,
48820	);
48821	assert_eq_m512(r, e);
48822	}
48823
48824	#[simd_test(enable = "avx512f")]
48825	fn test_mm512_mask_fmaddsub_round_ps() {
48826	let a = _mm512_set1_ps(`0.00000007`);
48827	let b = _mm512_set1_ps(`1.`);
48828	let c = _mm512_set1_ps(`-1.`);
48829	let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48830	a, `0`, b, c,
48831	);
48832	assert_eq_m512(r, a);
48833	let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48834	a,
48835	`0b00000000_11111111`,
48836	b,
48837	c,
48838	);
48839	#[rustfmt::skip]
48840	let e = _mm512_setr_ps(
48841	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
48842	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
48843	`0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
48844	`0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
48845	);
48846	assert_eq_m512(r, e);
48847	}
48848
48849	#[simd_test(enable = "avx512f")]
48850	fn test_mm512_maskz_fmaddsub_round_ps() {
48851	let a = _mm512_set1_ps(`0.00000007`);
48852	let b = _mm512_set1_ps(`1.`);
48853	let c = _mm512_set1_ps(`-1.`);
48854	let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48855	`0`, a, b, c,
48856	);
48857	assert_eq_m512(r, _mm512_setzero_ps());
48858	let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48859	`0b00000000_11111111`,
48860	a,
48861	b,
48862	c,
48863	);
48864	#[rustfmt::skip]
48865	let e = _mm512_setr_ps(
48866	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
48867	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
48868	`0.`, `0.`, `0.`, `0.`,
48869	`0.`, `0.`, `0.`, `0.`,
48870	);
48871	assert_eq_m512(r, e);
48872	}
48873
48874	#[simd_test(enable = "avx512f")]
48875	fn test_mm512_mask3_fmaddsub_round_ps() {
48876	let a = _mm512_set1_ps(`0.00000007`);
48877	let b = _mm512_set1_ps(`1.`);
48878	let c = _mm512_set1_ps(`-1.`);
48879	let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48880	a, b, c, `0`,
48881	);
48882	assert_eq_m512(r, c);
48883	let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48884	a,
48885	b,
48886	c,
48887	`0b00000000_11111111`,
48888	);
48889	#[rustfmt::skip]
48890	let e = _mm512_setr_ps(
48891	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
48892	`1.0000001`, `-0.99999994`, `1.0000001`, `-0.99999994`,
48893	`-1.`, `-1.`, `-1.`, `-1.`,
48894	`-1.`, `-1.`, `-1.`, `-1.`,
48895	);
48896	assert_eq_m512(r, e);
48897	}
48898
48899	#[simd_test(enable = "avx512f")]
48900	fn test_mm512_fmsubadd_round_ps() {
48901	let a = _mm512_set1_ps(`0.00000007`);
48902	let b = _mm512_set1_ps(`1.`);
48903	let c = _mm512_set1_ps(`-1.`);
48904	let r =
48905	_mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
48906	#[rustfmt::skip]
48907	let e = _mm512_setr_ps(
48908	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
48909	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
48910	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
48911	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
48912	);
48913	assert_eq_m512(r, e);
48914	let r = _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b, c);
48915	let e = _mm512_setr_ps(
48916	`-0.9999999`, `1.`, `-0.9999999`, `1.`, `-0.9999999`, `1.`, `-0.9999999`, `1.`, `-0.9999999`, `1.`,
48917	`-0.9999999`, `1.`, `-0.9999999`, `1.`, `-0.9999999`, `1.`,
48918	);
48919	assert_eq_m512(r, e);
48920	}
48921
48922	#[simd_test(enable = "avx512f")]
48923	fn test_mm512_mask_fmsubadd_round_ps() {
48924	let a = _mm512_set1_ps(`0.00000007`);
48925	let b = _mm512_set1_ps(`1.`);
48926	let c = _mm512_set1_ps(`-1.`);
48927	let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48928	a, `0`, b, c,
48929	);
48930	assert_eq_m512(r, a);
48931	let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48932	a,
48933	`0b00000000_11111111`,
48934	b,
48935	c,
48936	);
48937	#[rustfmt::skip]
48938	let e = _mm512_setr_ps(
48939	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
48940	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
48941	`0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
48942	`0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
48943	);
48944	assert_eq_m512(r, e);
48945	}
48946
48947	#[simd_test(enable = "avx512f")]
48948	fn test_mm512_maskz_fmsubadd_round_ps() {
48949	let a = _mm512_set1_ps(`0.00000007`);
48950	let b = _mm512_set1_ps(`1.`);
48951	let c = _mm512_set1_ps(`-1.`);
48952	let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48953	`0`, a, b, c,
48954	);
48955	assert_eq_m512(r, _mm512_setzero_ps());
48956	let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48957	`0b00000000_11111111`,
48958	a,
48959	b,
48960	c,
48961	);
48962	#[rustfmt::skip]
48963	let e = _mm512_setr_ps(
48964	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
48965	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
48966	`0.`, `0.`, `0.`, `0.`,
48967	`0.`, `0.`, `0.`, `0.`,
48968	);
48969	assert_eq_m512(r, e);
48970	}
48971
48972	#[simd_test(enable = "avx512f")]
48973	fn test_mm512_mask3_fmsubadd_round_ps() {
48974	let a = _mm512_set1_ps(`0.00000007`);
48975	let b = _mm512_set1_ps(`1.`);
48976	let c = _mm512_set1_ps(`-1.`);
48977	let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48978	a, b, c, `0`,
48979	);
48980	assert_eq_m512(r, c);
48981	let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
48982	a,
48983	b,
48984	c,
48985	`0b00000000_11111111`,
48986	);
48987	#[rustfmt::skip]
48988	let e = _mm512_setr_ps(
48989	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
48990	`-0.99999994`, `1.0000001`, `-0.99999994`, `1.0000001`,
48991	`-1.`, `-1.`, `-1.`, `-1.`,
48992	`-1.`, `-1.`, `-1.`, `-1.`,
48993	);
48994	assert_eq_m512(r, e);
48995	}
48996
48997	#[simd_test(enable = "avx512f")]
48998	fn test_mm512_fnmadd_round_ps() {
48999	let a = _mm512_set1_ps(`0.00000007`);
49000	let b = _mm512_set1_ps(`1.`);
49001	let c = _mm512_set1_ps(`1.`);
49002	let r =
49003	_mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
49004	let e = _mm512_set1_ps(`0.99999994`);
49005	assert_eq_m512(r, e);
49006	let r = _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b, c);
49007	let e = _mm512_set1_ps(`0.9999999`);
49008	assert_eq_m512(r, e);
49009	}
49010
49011	#[simd_test(enable = "avx512f")]
49012	fn test_mm512_mask_fnmadd_round_ps() {
49013	let a = _mm512_set1_ps(`0.00000007`);
49014	let b = _mm512_set1_ps(`1.`);
49015	let c = _mm512_set1_ps(`1.`);
49016	let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
49017	a, `0`, b, c,
49018	);
49019	assert_eq_m512(r, a);
49020	let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
49021	a,
49022	`0b00000000_11111111`,
49023	b,
49024	c,
49025	);
49026	let e = _mm512_setr_ps(
49027	`0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`,
49028	`0.99999994`, `0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
49029	`0.00000007`, `0.00000007`,
49030	);
49031	assert_eq_m512(r, e);
49032	}
49033
49034	#[simd_test(enable = "avx512f")]
49035	fn test_mm512_maskz_fnmadd_round_ps() {
49036	let a = _mm512_set1_ps(`0.00000007`);
49037	let b = _mm512_set1_ps(`1.`);
49038	let c = _mm512_set1_ps(`1.`);
49039	let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
49040	`0`, a, b, c,
49041	);
49042	assert_eq_m512(r, _mm512_setzero_ps());
49043	let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
49044	`0b00000000_11111111`,
49045	a,
49046	b,
49047	c,
49048	);
49049	let e = _mm512_setr_ps(
49050	`0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`,
49051	`0.99999994`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
49052	);
49053	assert_eq_m512(r, e);
49054	}
49055
49056	#[simd_test(enable = "avx512f")]
49057	fn test_mm512_mask3_fnmadd_round_ps() {
49058	let a = _mm512_set1_ps(`0.00000007`);
49059	let b = _mm512_set1_ps(`1.`);
49060	let c = _mm512_set1_ps(`1.`);
49061	let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
49062	a, b, c, `0`,
49063	);
49064	assert_eq_m512(r, c);
49065	let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
49066	a,
49067	b,
49068	c,
49069	`0b00000000_11111111`,
49070	);
49071	let e = _mm512_setr_ps(
49072	`0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`,
49073	`0.99999994`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
49074	);
49075	assert_eq_m512(r, e);
49076	}
49077
49078	#[simd_test(enable = "avx512f")]
49079	fn test_mm512_fnmsub_round_ps() {
49080	let a = _mm512_set1_ps(`0.00000007`);
49081	let b = _mm512_set1_ps(`1.`);
49082	let c = _mm512_set1_ps(`-1.`);
49083	let r =
49084	_mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
49085	let e = _mm512_set1_ps(`0.99999994`);
49086	assert_eq_m512(r, e);
49087	let r = _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b, c);
49088	let e = _mm512_set1_ps(`0.9999999`);
49089	assert_eq_m512(r, e);
49090	}
49091
49092	#[simd_test(enable = "avx512f")]
49093	fn test_mm512_mask_fnmsub_round_ps() {
49094	let a = _mm512_set1_ps(`0.00000007`);
49095	let b = _mm512_set1_ps(`1.`);
49096	let c = _mm512_set1_ps(`-1.`);
49097	let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
49098	a, `0`, b, c,
49099	);
49100	assert_eq_m512(r, a);
49101	let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
49102	a,
49103	`0b00000000_11111111`,
49104	b,
49105	c,
49106	);
49107	let e = _mm512_setr_ps(
49108	`0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`,
49109	`0.99999994`, `0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`, `0.00000007`,
49110	`0.00000007`, `0.00000007`,
49111	);
49112	assert_eq_m512(r, e);
49113	}
49114
49115	#[simd_test(enable = "avx512f")]
49116	fn test_mm512_maskz_fnmsub_round_ps() {
49117	let a = _mm512_set1_ps(`0.00000007`);
49118	let b = _mm512_set1_ps(`1.`);
49119	let c = _mm512_set1_ps(`-1.`);
49120	let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
49121	`0`, a, b, c,
49122	);
49123	assert_eq_m512(r, _mm512_setzero_ps());
49124	let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
49125	`0b00000000_11111111`,
49126	a,
49127	b,
49128	c,
49129	);
49130	let e = _mm512_setr_ps(
49131	`0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`,
49132	`0.99999994`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
49133	);
49134	assert_eq_m512(r, e);
49135	}
49136
49137	#[simd_test(enable = "avx512f")]
49138	fn test_mm512_mask3_fnmsub_round_ps() {
49139	let a = _mm512_set1_ps(`0.00000007`);
49140	let b = _mm512_set1_ps(`1.`);
49141	let c = _mm512_set1_ps(`-1.`);
49142	let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
49143	a, b, c, `0`,
49144	);
49145	assert_eq_m512(r, c);
49146	let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
49147	a,
49148	b,
49149	c,
49150	`0b00000000_11111111`,
49151	);
49152	let e = _mm512_setr_ps(
49153	`0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`, `0.99999994`,
49154	`0.99999994`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`,
49155	);
49156	assert_eq_m512(r, e);
49157	}
49158
49159	#[simd_test(enable = "avx512f")]
49160	fn test_mm512_max_round_ps() {
49161	let a = _mm512_setr_ps(
49162	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
49163	);
49164	let b = _mm512_setr_ps(
49165	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
49166	);
49167	let r = _mm512_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
49168	let e = _mm512_setr_ps(
49169	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
49170	);
49171	assert_eq_m512(r, e);
49172	}
49173
49174	#[simd_test(enable = "avx512f")]
49175	fn test_mm512_mask_max_round_ps() {
49176	let a = _mm512_setr_ps(
49177	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
49178	);
49179	let b = _mm512_setr_ps(
49180	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
49181	);
49182	let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
49183	assert_eq_m512(r, a);
49184	let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, `0b00000000_11111111`, a, b);
49185	let e = _mm512_setr_ps(
49186	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
49187	);
49188	assert_eq_m512(r, e);
49189	}
49190
49191	#[simd_test(enable = "avx512f")]
49192	fn test_mm512_maskz_max_round_ps() {
49193	let a = _mm512_setr_ps(
49194	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
49195	);
49196	let b = _mm512_setr_ps(
49197	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
49198	);
49199	let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
49200	assert_eq_m512(r, _mm512_setzero_ps());
49201	let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(`0b00000000_11111111`, a, b);
49202	let e = _mm512_setr_ps(
49203	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
49204	);
49205	assert_eq_m512(r, e);
49206	}
49207
49208	#[simd_test(enable = "avx512f")]
49209	fn test_mm512_min_round_ps() {
49210	let a = _mm512_setr_ps(
49211	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
49212	);
49213	let b = _mm512_setr_ps(
49214	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
49215	);
49216	let r = _mm512_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
49217	let e = _mm512_setr_ps(
49218	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
49219	);
49220	assert_eq_m512(r, e);
49221	}
49222
49223	#[simd_test(enable = "avx512f")]
49224	fn test_mm512_mask_min_round_ps() {
49225	let a = _mm512_setr_ps(
49226	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
49227	);
49228	let b = _mm512_setr_ps(
49229	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
49230	);
49231	let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
49232	assert_eq_m512(r, a);
49233	let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, `0b00000000_11111111`, a, b);
49234	let e = _mm512_setr_ps(
49235	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
49236	);
49237	assert_eq_m512(r, e);
49238	}
49239
49240	#[simd_test(enable = "avx512f")]
49241	fn test_mm512_maskz_min_round_ps() {
49242	let a = _mm512_setr_ps(
49243	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
49244	);
49245	let b = _mm512_setr_ps(
49246	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
49247	);
49248	let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
49249	assert_eq_m512(r, _mm512_setzero_ps());
49250	let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(`0b00000000_11111111`, a, b);
49251	let e = _mm512_setr_ps(
49252	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
49253	);
49254	assert_eq_m512(r, e);
49255	}
49256
49257	#[simd_test(enable = "avx512f")]
49258	fn test_mm512_getexp_round_ps() {
49259	let a = _mm512_set1_ps(`3.`);
49260	let r = _mm512_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a);
49261	let e = _mm512_set1_ps(`1.`);
49262	assert_eq_m512(r, e);
49263	}
49264
49265	#[simd_test(enable = "avx512f")]
49266	fn test_mm512_mask_getexp_round_ps() {
49267	let a = _mm512_set1_ps(`3.`);
49268	let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a);
49269	assert_eq_m512(r, a);
49270	let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, `0b11111111_00000000`, a);
49271	let e = _mm512_setr_ps(
49272	`3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `3.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
49273	);
49274	assert_eq_m512(r, e);
49275	}
49276
49277	#[simd_test(enable = "avx512f")]
49278	fn test_mm512_maskz_getexp_round_ps() {
49279	let a = _mm512_set1_ps(`3.`);
49280	let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(`0`, a);
49281	assert_eq_m512(r, _mm512_setzero_ps());
49282	let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(`0b11111111_00000000`, a);
49283	let e = _mm512_setr_ps(
49284	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
49285	);
49286	assert_eq_m512(r, e);
49287	}
49288
49289	#[simd_test(enable = "avx512f")]
49290	fn test_mm512_roundscale_round_ps() {
49291	let a = _mm512_set1_ps(`1.1`);
49292	let r = _mm512_roundscale_round_ps::<`0`, _MM_FROUND_CUR_DIRECTION>(a);
49293	let e = _mm512_set1_ps(`1.0`);
49294	assert_eq_m512(r, e);
49295	}
49296
49297	#[simd_test(enable = "avx512f")]
49298	fn test_mm512_mask_roundscale_round_ps() {
49299	let a = _mm512_set1_ps(`1.1`);
49300	let r = _mm512_mask_roundscale_round_ps::<`0`, _MM_FROUND_CUR_DIRECTION>(a, `0`, a);
49301	let e = _mm512_set1_ps(`1.1`);
49302	assert_eq_m512(r, e);
49303	let r = _mm512_mask_roundscale_round_ps::<`0`, _MM_FROUND_CUR_DIRECTION>(
49304	a,
49305	`0b11111111_11111111`,
49306	a,
49307	);
49308	let e = _mm512_set1_ps(`1.0`);
49309	assert_eq_m512(r, e);
49310	}
49311
49312	#[simd_test(enable = "avx512f")]
49313	fn test_mm512_maskz_roundscale_round_ps() {
49314	let a = _mm512_set1_ps(`1.1`);
49315	let r = _mm512_maskz_roundscale_round_ps::<`0`, _MM_FROUND_CUR_DIRECTION>(`0`, a);
49316	assert_eq_m512(r, _mm512_setzero_ps());
49317	let r =
49318	_mm512_maskz_roundscale_round_ps::<`0`, _MM_FROUND_CUR_DIRECTION>(`0b11111111_11111111`, a);
49319	let e = _mm512_set1_ps(`1.0`);
49320	assert_eq_m512(r, e);
49321	}
49322
49323	#[simd_test(enable = "avx512f")]
49324	fn test_mm512_scalef_round_ps() {
49325	let a = _mm512_set1_ps(`1.`);
49326	let b = _mm512_set1_ps(`3.`);
49327	let r = _mm512_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
49328	let e = _mm512_set1_ps(`8.`);
49329	assert_eq_m512(r, e);
49330	}
49331
49332	#[simd_test(enable = "avx512f")]
49333	fn test_mm512_mask_scalef_round_ps() {
49334	let a = _mm512_set1_ps(`1.`);
49335	let b = _mm512_set1_ps(`3.`);
49336	let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
49337	a, `0`, a, b,
49338	);
49339	assert_eq_m512(r, a);
49340	let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
49341	a,
49342	`0b11111111_00000000`,
49343	a,
49344	b,
49345	);
49346	let e = _mm512_set_ps(
49347	`8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
49348	);
49349	assert_eq_m512(r, e);
49350	}
49351
49352	#[simd_test(enable = "avx512f")]
49353	fn test_mm512_maskz_scalef_round_ps() {
49354	let a = _mm512_set1_ps(`1.`);
49355	let b = _mm512_set1_ps(`3.`);
49356	let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
49357	`0`, a, b,
49358	);
49359	assert_eq_m512(r, _mm512_setzero_ps());
49360	let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
49361	`0b11111111_00000000`,
49362	a,
49363	b,
49364	);
49365	let e = _mm512_set_ps(
49366	`8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
49367	);
49368	assert_eq_m512(r, e);
49369	}
49370
49371	#[simd_test(enable = "avx512f")]
49372	fn test_mm512_fixupimm_round_ps() {
49373	let a = _mm512_set1_ps(f32::NAN);
49374	let b = _mm512_set1_ps(f32::MAX);
49375	let c = _mm512_set1_epi32(i32::MAX);
49376	let r = _mm512_fixupimm_round_ps::<`5`, _MM_FROUND_CUR_DIRECTION>(a, b, c);
49377	let e = _mm512_set1_ps(`0.0`);
49378	assert_eq_m512(r, e);
49379	}
49380
49381	#[simd_test(enable = "avx512f")]
49382	fn test_mm512_mask_fixupimm_round_ps() {
49383	#[rustfmt::skip]
49384	let a = _mm512_set_ps(
49385	f32::NAN, f32::NAN, f32::NAN, f32::NAN,
49386	f32::NAN, f32::NAN, f32::NAN, f32::NAN,
49387	`1.`, `1.`, `1.`, `1.`,
49388	`1.`, `1.`, `1.`, `1.`,
49389	);
49390	let b = _mm512_set1_ps(f32::MAX);
49391	let c = _mm512_set1_epi32(i32::MAX);
49392	let r = _mm512_mask_fixupimm_round_ps::<`5`, _MM_FROUND_CUR_DIRECTION>(
49393	a,
49394	`0b11111111_00000000`,
49395	b,
49396	c,
49397	);
49398	let e = _mm512_set_ps(
49399	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
49400	);
49401	assert_eq_m512(r, e);
49402	}
49403
49404	#[simd_test(enable = "avx512f")]
49405	fn test_mm512_maskz_fixupimm_round_ps() {
49406	#[rustfmt::skip]
49407	let a = _mm512_set_ps(
49408	f32::NAN, f32::NAN, f32::NAN, f32::NAN,
49409	f32::NAN, f32::NAN, f32::NAN, f32::NAN,
49410	`1.`, `1.`, `1.`, `1.`,
49411	`1.`, `1.`, `1.`, `1.`,
49412	);
49413	let b = _mm512_set1_ps(f32::MAX);
49414	let c = _mm512_set1_epi32(i32::MAX);
49415	let r = _mm512_maskz_fixupimm_round_ps::<`5`, _MM_FROUND_CUR_DIRECTION>(
49416	`0b11111111_00000000`,
49417	a,
49418	b,
49419	c,
49420	);
49421	let e = _mm512_set_ps(
49422	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
49423	);
49424	assert_eq_m512(r, e);
49425	}
49426
49427	#[simd_test(enable = "avx512f")]
49428	fn test_mm512_getmant_round_ps() {
49429	let a = _mm512_set1_ps(`10.`);
49430	let r = _mm512_getmant_round_ps::<
49431	_MM_MANT_NORM_1_2,
49432	_MM_MANT_SIGN_SRC,
49433	_MM_FROUND_CUR_DIRECTION,
49434	>(a);
49435	let e = _mm512_set1_ps(`1.25`);
49436	assert_eq_m512(r, e);
49437	}
49438
49439	#[simd_test(enable = "avx512f")]
49440	fn test_mm512_mask_getmant_round_ps() {
49441	let a = _mm512_set1_ps(`10.`);
49442	let r = _mm512_mask_getmant_round_ps::<
49443	_MM_MANT_NORM_1_2,
49444	_MM_MANT_SIGN_SRC,
49445	_MM_FROUND_CUR_DIRECTION,
49446	>(a, `0`, a);
49447	assert_eq_m512(r, a);
49448	let r = _mm512_mask_getmant_round_ps::<
49449	_MM_MANT_NORM_1_2,
49450	_MM_MANT_SIGN_SRC,
49451	_MM_FROUND_CUR_DIRECTION,
49452	>(a, `0b11111111_00000000`, a);
49453	let e = _mm512_setr_ps(
49454	`10.`, `10.`, `10.`, `10.`, `10.`, `10.`, `10.`, `10.`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`,
49455	);
49456	assert_eq_m512(r, e);
49457	}
49458
49459	#[simd_test(enable = "avx512f")]
49460	fn test_mm512_maskz_getmant_round_ps() {
49461	let a = _mm512_set1_ps(`10.`);
49462	let r = _mm512_maskz_getmant_round_ps::<
49463	_MM_MANT_NORM_1_2,
49464	_MM_MANT_SIGN_SRC,
49465	_MM_FROUND_CUR_DIRECTION,
49466	>(`0`, a);
49467	assert_eq_m512(r, _mm512_setzero_ps());
49468	let r = _mm512_maskz_getmant_round_ps::<
49469	_MM_MANT_NORM_1_2,
49470	_MM_MANT_SIGN_SRC,
49471	_MM_FROUND_CUR_DIRECTION,
49472	>(`0b11111111_00000000`, a);
49473	let e = _mm512_setr_ps(
49474	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`, `1.25`,
49475	);
49476	assert_eq_m512(r, e);
49477	}
49478
49479	#[simd_test(enable = "avx512f")]
49480	fn test_mm512_cvtps_epi32() {
49481	let a = _mm512_setr_ps(
49482	`0.`, `-1.4`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
49483	);
49484	let r = _mm512_cvtps_epi32(a);
49485	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
49486	assert_eq_m512i(r, e);
49487	}
49488
49489	#[simd_test(enable = "avx512f")]
49490	fn test_mm512_mask_cvtps_epi32() {
49491	let a = _mm512_setr_ps(
49492	`0.`, `-1.4`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
49493	);
49494	let src = _mm512_set1_epi32(`0`);
49495	let r = _mm512_mask_cvtps_epi32(src, `0`, a);
49496	assert_eq_m512i(r, src);
49497	let r = _mm512_mask_cvtps_epi32(src, `0b00000000_11111111`, a);
49498	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
49499	assert_eq_m512i(r, e);
49500	}
49501
49502	#[simd_test(enable = "avx512f")]
49503	fn test_mm512_maskz_cvtps_epi32() {
49504	let a = _mm512_setr_ps(
49505	`0.`, `-1.4`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
49506	);
49507	let r = _mm512_maskz_cvtps_epi32(`0`, a);
49508	assert_eq_m512i(r, _mm512_setzero_si512());
49509	let r = _mm512_maskz_cvtps_epi32(`0b00000000_11111111`, a);
49510	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
49511	assert_eq_m512i(r, e);
49512	}
49513
49514	#[simd_test(enable = "avx512f,avx512vl")]
49515	fn test_mm256_mask_cvtps_epi32() {
49516	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
49517	let src = _mm256_set1_epi32(`0`);
49518	let r = _mm256_mask_cvtps_epi32(src, `0`, a);
49519	assert_eq_m256i(r, src);
49520	let r = _mm256_mask_cvtps_epi32(src, `0b11111111`, a);
49521	let e = _mm256_set_epi32(`8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
49522	assert_eq_m256i(r, e);
49523	}
49524
49525	#[simd_test(enable = "avx512f,avx512vl")]
49526	fn test_mm256_maskz_cvtps_epi32() {
49527	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
49528	let r = _mm256_maskz_cvtps_epi32(`0`, a);
49529	assert_eq_m256i(r, _mm256_setzero_si256());
49530	let r = _mm256_maskz_cvtps_epi32(`0b11111111`, a);
49531	let e = _mm256_set_epi32(`8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
49532	assert_eq_m256i(r, e);
49533	}
49534
49535	#[simd_test(enable = "avx512f,avx512vl")]
49536	fn test_mm_mask_cvtps_epi32() {
49537	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
49538	let src = _mm_set1_epi32(`0`);
49539	let r = _mm_mask_cvtps_epi32(src, `0`, a);
49540	assert_eq_m128i(r, src);
49541	let r = _mm_mask_cvtps_epi32(src, `0b00001111`, a);
49542	let e = _mm_set_epi32(`12`, `14`, `14`, `16`);
49543	assert_eq_m128i(r, e);
49544	}
49545
49546	#[simd_test(enable = "avx512f,avx512vl")]
49547	fn test_mm_maskz_cvtps_epi32() {
49548	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
49549	let r = _mm_maskz_cvtps_epi32(`0`, a);
49550	assert_eq_m128i(r, _mm_setzero_si128());
49551	let r = _mm_maskz_cvtps_epi32(`0b00001111`, a);
49552	let e = _mm_set_epi32(`12`, `14`, `14`, `16`);
49553	assert_eq_m128i(r, e);
49554	}
49555
49556	#[simd_test(enable = "avx512f")]
49557	fn test_mm512_cvtps_epu32() {
49558	let a = _mm512_setr_ps(
49559	`0.`, `-1.4`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
49560	);
49561	let r = _mm512_cvtps_epu32(a);
49562	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
49563	assert_eq_m512i(r, e);
49564	}
49565
49566	#[simd_test(enable = "avx512f")]
49567	fn test_mm512_mask_cvtps_epu32() {
49568	let a = _mm512_setr_ps(
49569	`0.`, `-1.4`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
49570	);
49571	let src = _mm512_set1_epi32(`0`);
49572	let r = _mm512_mask_cvtps_epu32(src, `0`, a);
49573	assert_eq_m512i(r, src);
49574	let r = _mm512_mask_cvtps_epu32(src, `0b00000000_11111111`, a);
49575	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
49576	assert_eq_m512i(r, e);
49577	}
49578
49579	#[simd_test(enable = "avx512f")]
49580	fn test_mm512_maskz_cvtps_epu32() {
49581	let a = _mm512_setr_ps(
49582	`0.`, `-1.4`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
49583	);
49584	let r = _mm512_maskz_cvtps_epu32(`0`, a);
49585	assert_eq_m512i(r, _mm512_setzero_si512());
49586	let r = _mm512_maskz_cvtps_epu32(`0b00000000_11111111`, a);
49587	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
49588	assert_eq_m512i(r, e);
49589	}
49590
49591	#[simd_test(enable = "avx512f,avx512vl")]
49592	fn test_mm256_cvtps_epu32() {
49593	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
49594	let r = _mm256_cvtps_epu32(a);
49595	let e = _mm256_set_epi32(`8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
49596	assert_eq_m256i(r, e);
49597	}
49598
49599	#[simd_test(enable = "avx512f,avx512vl")]
49600	fn test_mm256_mask_cvtps_epu32() {
49601	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
49602	let src = _mm256_set1_epi32(`0`);
49603	let r = _mm256_mask_cvtps_epu32(src, `0`, a);
49604	assert_eq_m256i(r, src);
49605	let r = _mm256_mask_cvtps_epu32(src, `0b11111111`, a);
49606	let e = _mm256_set_epi32(`8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
49607	assert_eq_m256i(r, e);
49608	}
49609
49610	#[simd_test(enable = "avx512f,avx512vl")]
49611	fn test_mm256_maskz_cvtps_epu32() {
49612	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
49613	let r = _mm256_maskz_cvtps_epu32(`0`, a);
49614	assert_eq_m256i(r, _mm256_setzero_si256());
49615	let r = _mm256_maskz_cvtps_epu32(`0b11111111`, a);
49616	let e = _mm256_set_epi32(`8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
49617	assert_eq_m256i(r, e);
49618	}
49619
49620	#[simd_test(enable = "avx512f,avx512vl")]
49621	fn test_mm_cvtps_epu32() {
49622	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
49623	let r = _mm_cvtps_epu32(a);
49624	let e = _mm_set_epi32(`12`, `14`, `14`, `16`);
49625	assert_eq_m128i(r, e);
49626	}
49627
49628	#[simd_test(enable = "avx512f,avx512vl")]
49629	fn test_mm_mask_cvtps_epu32() {
49630	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
49631	let src = _mm_set1_epi32(`0`);
49632	let r = _mm_mask_cvtps_epu32(src, `0`, a);
49633	assert_eq_m128i(r, src);
49634	let r = _mm_mask_cvtps_epu32(src, `0b00001111`, a);
49635	let e = _mm_set_epi32(`12`, `14`, `14`, `16`);
49636	assert_eq_m128i(r, e);
49637	}
49638
49639	#[simd_test(enable = "avx512f,avx512vl")]
49640	fn test_mm_maskz_cvtps_epu32() {
49641	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
49642	let r = _mm_maskz_cvtps_epu32(`0`, a);
49643	assert_eq_m128i(r, _mm_setzero_si128());
49644	let r = _mm_maskz_cvtps_epu32(`0b00001111`, a);
49645	let e = _mm_set_epi32(`12`, `14`, `14`, `16`);
49646	assert_eq_m128i(r, e);
49647	}
49648
49649	#[simd_test(enable = "avx512f")]
49650	const fn test_mm512_cvtepi8_epi32() {
49651	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49652	let r = _mm512_cvtepi8_epi32(a);
49653	let e = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49654	assert_eq_m512i(r, e);
49655	}
49656
49657	#[simd_test(enable = "avx512f")]
49658	const fn test_mm512_mask_cvtepi8_epi32() {
49659	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49660	let src = _mm512_set1_epi32(`-1`);
49661	let r = _mm512_mask_cvtepi8_epi32(src, `0`, a);
49662	assert_eq_m512i(r, src);
49663	let r = _mm512_mask_cvtepi8_epi32(src, `0b00000000_11111111`, a);
49664	let e = _mm512_set_epi32(`-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49665	assert_eq_m512i(r, e);
49666	}
49667
49668	#[simd_test(enable = "avx512f")]
49669	const fn test_mm512_maskz_cvtepi8_epi32() {
49670	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49671	let r = _mm512_maskz_cvtepi8_epi32(`0`, a);
49672	assert_eq_m512i(r, _mm512_setzero_si512());
49673	let r = _mm512_maskz_cvtepi8_epi32(`0b00000000_11111111`, a);
49674	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49675	assert_eq_m512i(r, e);
49676	}
49677
49678	#[simd_test(enable = "avx512f,avx512vl")]
49679	const fn test_mm256_mask_cvtepi8_epi32() {
49680	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49681	let src = _mm256_set1_epi32(`-1`);
49682	let r = _mm256_mask_cvtepi8_epi32(src, `0`, a);
49683	assert_eq_m256i(r, src);
49684	let r = _mm256_mask_cvtepi8_epi32(src, `0b11111111`, a);
49685	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49686	assert_eq_m256i(r, e);
49687	}
49688
49689	#[simd_test(enable = "avx512f,avx512vl")]
49690	const fn test_mm256_maskz_cvtepi8_epi32() {
49691	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49692	let r = _mm256_maskz_cvtepi8_epi32(`0`, a);
49693	assert_eq_m256i(r, _mm256_setzero_si256());
49694	let r = _mm256_maskz_cvtepi8_epi32(`0b11111111`, a);
49695	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49696	assert_eq_m256i(r, e);
49697	}
49698
49699	#[simd_test(enable = "avx512f,avx512vl")]
49700	const fn test_mm_mask_cvtepi8_epi32() {
49701	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49702	let src = _mm_set1_epi32(`-1`);
49703	let r = _mm_mask_cvtepi8_epi32(src, `0`, a);
49704	assert_eq_m128i(r, src);
49705	let r = _mm_mask_cvtepi8_epi32(src, `0b00001111`, a);
49706	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
49707	assert_eq_m128i(r, e);
49708	}
49709
49710	#[simd_test(enable = "avx512f,avx512vl")]
49711	const fn test_mm_maskz_cvtepi8_epi32() {
49712	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49713	let r = _mm_maskz_cvtepi8_epi32(`0`, a);
49714	assert_eq_m128i(r, _mm_setzero_si128());
49715	let r = _mm_maskz_cvtepi8_epi32(`0b00001111`, a);
49716	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
49717	assert_eq_m128i(r, e);
49718	}
49719
49720	#[simd_test(enable = "avx512f")]
49721	const fn test_mm512_cvtepu8_epi32() {
49722	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49723	let r = _mm512_cvtepu8_epi32(a);
49724	let e = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49725	assert_eq_m512i(r, e);
49726	}
49727
49728	#[simd_test(enable = "avx512f")]
49729	const fn test_mm512_mask_cvtepu8_epi32() {
49730	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49731	let src = _mm512_set1_epi32(`-1`);
49732	let r = _mm512_mask_cvtepu8_epi32(src, `0`, a);
49733	assert_eq_m512i(r, src);
49734	let r = _mm512_mask_cvtepu8_epi32(src, `0b00000000_11111111`, a);
49735	let e = _mm512_set_epi32(`-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49736	assert_eq_m512i(r, e);
49737	}
49738
49739	#[simd_test(enable = "avx512f")]
49740	const fn test_mm512_maskz_cvtepu8_epi32() {
49741	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49742	let r = _mm512_maskz_cvtepu8_epi32(`0`, a);
49743	assert_eq_m512i(r, _mm512_setzero_si512());
49744	let r = _mm512_maskz_cvtepu8_epi32(`0b00000000_11111111`, a);
49745	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49746	assert_eq_m512i(r, e);
49747	}
49748
49749	#[simd_test(enable = "avx512f,avx512vl")]
49750	const fn test_mm256_mask_cvtepu8_epi32() {
49751	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49752	let src = _mm256_set1_epi32(`-1`);
49753	let r = _mm256_mask_cvtepu8_epi32(src, `0`, a);
49754	assert_eq_m256i(r, src);
49755	let r = _mm256_mask_cvtepu8_epi32(src, `0b11111111`, a);
49756	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49757	assert_eq_m256i(r, e);
49758	}
49759
49760	#[simd_test(enable = "avx512f,avx512vl")]
49761	const fn test_mm256_maskz_cvtepu8_epi32() {
49762	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49763	let r = _mm256_maskz_cvtepu8_epi32(`0`, a);
49764	assert_eq_m256i(r, _mm256_setzero_si256());
49765	let r = _mm256_maskz_cvtepu8_epi32(`0b11111111`, a);
49766	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49767	assert_eq_m256i(r, e);
49768	}
49769
49770	#[simd_test(enable = "avx512f,avx512vl")]
49771	const fn test_mm_mask_cvtepu8_epi32() {
49772	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49773	let src = _mm_set1_epi32(`-1`);
49774	let r = _mm_mask_cvtepu8_epi32(src, `0`, a);
49775	assert_eq_m128i(r, src);
49776	let r = _mm_mask_cvtepu8_epi32(src, `0b00001111`, a);
49777	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
49778	assert_eq_m128i(r, e);
49779	}
49780
49781	#[simd_test(enable = "avx512f,avx512vl")]
49782	const fn test_mm_maskz_cvtepu8_epi32() {
49783	let a = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49784	let r = _mm_maskz_cvtepu8_epi32(`0`, a);
49785	assert_eq_m128i(r, _mm_setzero_si128());
49786	let r = _mm_maskz_cvtepu8_epi32(`0b00001111`, a);
49787	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
49788	assert_eq_m128i(r, e);
49789	}
49790
49791	#[simd_test(enable = "avx512f")]
49792	const fn test_mm512_cvtepi16_epi32() {
49793	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49794	let r = _mm512_cvtepi16_epi32(a);
49795	let e = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49796	assert_eq_m512i(r, e);
49797	}
49798
49799	#[simd_test(enable = "avx512f")]
49800	const fn test_mm512_mask_cvtepi16_epi32() {
49801	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49802	let src = _mm512_set1_epi32(`-1`);
49803	let r = _mm512_mask_cvtepi16_epi32(src, `0`, a);
49804	assert_eq_m512i(r, src);
49805	let r = _mm512_mask_cvtepi16_epi32(src, `0b00000000_11111111`, a);
49806	let e = _mm512_set_epi32(`-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49807	assert_eq_m512i(r, e);
49808	}
49809
49810	#[simd_test(enable = "avx512f")]
49811	const fn test_mm512_maskz_cvtepi16_epi32() {
49812	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49813	let r = _mm512_maskz_cvtepi16_epi32(`0`, a);
49814	assert_eq_m512i(r, _mm512_setzero_si512());
49815	let r = _mm512_maskz_cvtepi16_epi32(`0b00000000_11111111`, a);
49816	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49817	assert_eq_m512i(r, e);
49818	}
49819
49820	#[simd_test(enable = "avx512f,avx512vl")]
49821	const fn test_mm256_mask_cvtepi16_epi32() {
49822	let a = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
49823	let src = _mm256_set1_epi32(`-1`);
49824	let r = _mm256_mask_cvtepi16_epi32(src, `0`, a);
49825	assert_eq_m256i(r, src);
49826	let r = _mm256_mask_cvtepi16_epi32(src, `0b11111111`, a);
49827	let e = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
49828	assert_eq_m256i(r, e);
49829	}
49830
49831	#[simd_test(enable = "avx512f,avx512vl")]
49832	const fn test_mm256_maskz_cvtepi16_epi32() {
49833	let a = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
49834	let r = _mm256_maskz_cvtepi16_epi32(`0`, a);
49835	assert_eq_m256i(r, _mm256_setzero_si256());
49836	let r = _mm256_maskz_cvtepi16_epi32(`0b11111111`, a);
49837	let e = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
49838	assert_eq_m256i(r, e);
49839	}
49840
49841	#[simd_test(enable = "avx512f,avx512vl")]
49842	const fn test_mm_mask_cvtepi16_epi32() {
49843	let a = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
49844	let src = _mm_set1_epi32(`-1`);
49845	let r = _mm_mask_cvtepi16_epi32(src, `0`, a);
49846	assert_eq_m128i(r, src);
49847	let r = _mm_mask_cvtepi16_epi32(src, `0b00001111`, a);
49848	let e = _mm_set_epi32(`4`, `5`, `6`, `7`);
49849	assert_eq_m128i(r, e);
49850	}
49851
49852	#[simd_test(enable = "avx512f,avx512vl")]
49853	const fn test_mm_maskz_cvtepi16_epi32() {
49854	let a = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
49855	let r = _mm_maskz_cvtepi16_epi32(`0`, a);
49856	assert_eq_m128i(r, _mm_setzero_si128());
49857	let r = _mm_maskz_cvtepi16_epi32(`0b00001111`, a);
49858	let e = _mm_set_epi32(`4`, `5`, `6`, `7`);
49859	assert_eq_m128i(r, e);
49860	}
49861
49862	#[simd_test(enable = "avx512f")]
49863	const fn test_mm512_cvtepu16_epi32() {
49864	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49865	let r = _mm512_cvtepu16_epi32(a);
49866	let e = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49867	assert_eq_m512i(r, e);
49868	}
49869
49870	#[simd_test(enable = "avx512f")]
49871	const fn test_mm512_mask_cvtepu16_epi32() {
49872	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49873	let src = _mm512_set1_epi32(`-1`);
49874	let r = _mm512_mask_cvtepu16_epi32(src, `0`, a);
49875	assert_eq_m512i(r, src);
49876	let r = _mm512_mask_cvtepu16_epi32(src, `0b00000000_11111111`, a);
49877	let e = _mm512_set_epi32(`-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49878	assert_eq_m512i(r, e);
49879	}
49880
49881	#[simd_test(enable = "avx512f")]
49882	const fn test_mm512_maskz_cvtepu16_epi32() {
49883	let a = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49884	let r = _mm512_maskz_cvtepu16_epi32(`0`, a);
49885	assert_eq_m512i(r, _mm512_setzero_si512());
49886	let r = _mm512_maskz_cvtepu16_epi32(`0b00000000_11111111`, a);
49887	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49888	assert_eq_m512i(r, e);
49889	}
49890
49891	#[simd_test(enable = "avx512f,avx512vl")]
49892	const fn test_mm256_mask_cvtepu16_epi32() {
49893	let a = _mm_set_epi16(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49894	let src = _mm256_set1_epi32(`-1`);
49895	let r = _mm256_mask_cvtepu16_epi32(src, `0`, a);
49896	assert_eq_m256i(r, src);
49897	let r = _mm256_mask_cvtepu16_epi32(src, `0b11111111`, a);
49898	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49899	assert_eq_m256i(r, e);
49900	}
49901
49902	#[simd_test(enable = "avx512f,avx512vl")]
49903	const fn test_mm256_maskz_cvtepu16_epi32() {
49904	let a = _mm_set_epi16(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49905	let r = _mm256_maskz_cvtepu16_epi32(`0`, a);
49906	assert_eq_m256i(r, _mm256_setzero_si256());
49907	let r = _mm256_maskz_cvtepu16_epi32(`0b11111111`, a);
49908	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49909	assert_eq_m256i(r, e);
49910	}
49911
49912	#[simd_test(enable = "avx512f,avx512vl")]
49913	const fn test_mm_mask_cvtepu16_epi32() {
49914	let a = _mm_set_epi16(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49915	let src = _mm_set1_epi32(`-1`);
49916	let r = _mm_mask_cvtepu16_epi32(src, `0`, a);
49917	assert_eq_m128i(r, src);
49918	let r = _mm_mask_cvtepu16_epi32(src, `0b00001111`, a);
49919	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
49920	assert_eq_m128i(r, e);
49921	}
49922
49923	#[simd_test(enable = "avx512f,avx512vl")]
49924	const fn test_mm_maskz_cvtepu16_epi32() {
49925	let a = _mm_set_epi16(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49926	let r = _mm_maskz_cvtepu16_epi32(`0`, a);
49927	assert_eq_m128i(r, _mm_setzero_si128());
49928	let r = _mm_maskz_cvtepu16_epi32(`0b00001111`, a);
49929	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
49930	assert_eq_m128i(r, e);
49931	}
49932
49933	#[simd_test(enable = "avx512f")]
49934	const fn test_mm512_cvtepi32_ps() {
49935	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49936	let r = _mm512_cvtepi32_ps(a);
49937	let e = _mm512_set_ps(
49938	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
49939	);
49940	assert_eq_m512(r, e);
49941	}
49942
49943	#[simd_test(enable = "avx512f")]
49944	const fn test_mm512_mask_cvtepi32_ps() {
49945	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49946	let src = _mm512_set1_ps(`-1.`);
49947	let r = _mm512_mask_cvtepi32_ps(src, `0`, a);
49948	assert_eq_m512(r, src);
49949	let r = _mm512_mask_cvtepi32_ps(src, `0b00000000_11111111`, a);
49950	let e = _mm512_set_ps(
49951	`-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
49952	);
49953	assert_eq_m512(r, e);
49954	}
49955
49956	#[simd_test(enable = "avx512f")]
49957	const fn test_mm512_maskz_cvtepi32_ps() {
49958	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
49959	let r = _mm512_maskz_cvtepi32_ps(`0`, a);
49960	assert_eq_m512(r, _mm512_setzero_ps());
49961	let r = _mm512_maskz_cvtepi32_ps(`0b00000000_11111111`, a);
49962	let e = _mm512_set_ps(
49963	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
49964	);
49965	assert_eq_m512(r, e);
49966	}
49967
49968	#[simd_test(enable = "avx512f,avx512vl")]
49969	const fn test_mm256_mask_cvtepi32_ps() {
49970	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
49971	let src = _mm256_set1_ps(`-1.`);
49972	let r = _mm256_mask_cvtepi32_ps(src, `0`, a);
49973	assert_eq_m256(r, src);
49974	let r = _mm256_mask_cvtepi32_ps(src, `0b11111111`, a);
49975	let e = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
49976	assert_eq_m256(r, e);
49977	}
49978
49979	#[simd_test(enable = "avx512f,avx512vl")]
49980	const fn test_mm256_maskz_cvtepi32_ps() {
49981	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
49982	let r = _mm256_maskz_cvtepi32_ps(`0`, a);
49983	assert_eq_m256(r, _mm256_setzero_ps());
49984	let r = _mm256_maskz_cvtepi32_ps(`0b11111111`, a);
49985	let e = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
49986	assert_eq_m256(r, e);
49987	}
49988
49989	#[simd_test(enable = "avx512f,avx512vl")]
49990	const fn test_mm_mask_cvtepi32_ps() {
49991	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
49992	let src = _mm_set1_ps(`-1.`);
49993	let r = _mm_mask_cvtepi32_ps(src, `0`, a);
49994	assert_eq_m128(r, src);
49995	let r = _mm_mask_cvtepi32_ps(src, `0b00001111`, a);
49996	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
49997	assert_eq_m128(r, e);
49998	}
49999
50000	#[simd_test(enable = "avx512f,avx512vl")]
50001	const fn test_mm_maskz_cvtepi32_ps() {
50002	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
50003	let r = _mm_maskz_cvtepi32_ps(`0`, a);
50004	assert_eq_m128(r, _mm_setzero_ps());
50005	let r = _mm_maskz_cvtepi32_ps(`0b00001111`, a);
50006	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
50007	assert_eq_m128(r, e);
50008	}
50009
50010	#[simd_test(enable = "avx512f")]
50011	const fn test_mm512_cvtepu32_ps() {
50012	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
50013	let r = _mm512_cvtepu32_ps(a);
50014	let e = _mm512_set_ps(
50015	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
50016	);
50017	assert_eq_m512(r, e);
50018	}
50019
50020	#[simd_test(enable = "avx512f")]
50021	const fn test_mm512_mask_cvtepu32_ps() {
50022	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
50023	let src = _mm512_set1_ps(`-1.`);
50024	let r = _mm512_mask_cvtepu32_ps(src, `0`, a);
50025	assert_eq_m512(r, src);
50026	let r = _mm512_mask_cvtepu32_ps(src, `0b00000000_11111111`, a);
50027	let e = _mm512_set_ps(
50028	`-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
50029	);
50030	assert_eq_m512(r, e);
50031	}
50032
50033	#[simd_test(enable = "avx512f")]
50034	const fn test_mm512_maskz_cvtepu32_ps() {
50035	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
50036	let r = _mm512_maskz_cvtepu32_ps(`0`, a);
50037	assert_eq_m512(r, _mm512_setzero_ps());
50038	let r = _mm512_maskz_cvtepu32_ps(`0b00000000_11111111`, a);
50039	let e = _mm512_set_ps(
50040	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
50041	);
50042	assert_eq_m512(r, e);
50043	}
50044
50045	#[simd_test(enable = "avx512f")]
50046	const fn test_mm512_cvtepi32_epi16() {
50047	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
50048	let r = _mm512_cvtepi32_epi16(a);
50049	let e = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
50050	assert_eq_m256i(r, e);
50051	}
50052
50053	#[simd_test(enable = "avx512f")]
50054	const fn test_mm512_mask_cvtepi32_epi16() {
50055	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
50056	let src = _mm256_set1_epi16(`-1`);
50057	let r = _mm512_mask_cvtepi32_epi16(src, `0`, a);
50058	assert_eq_m256i(r, src);
50059	let r = _mm512_mask_cvtepi32_epi16(src, `0b00000000_11111111`, a);
50060	let e = _mm256_set_epi16(`-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
50061	assert_eq_m256i(r, e);
50062	}
50063
50064	#[simd_test(enable = "avx512f")]
50065	const fn test_mm512_maskz_cvtepi32_epi16() {
50066	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
50067	let r = _mm512_maskz_cvtepi32_epi16(`0`, a);
50068	assert_eq_m256i(r, _mm256_setzero_si256());
50069	let r = _mm512_maskz_cvtepi32_epi16(`0b00000000_11111111`, a);
50070	let e = _mm256_set_epi16(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
50071	assert_eq_m256i(r, e);
50072	}
50073
50074	#[simd_test(enable = "avx512f,avx512vl")]
50075	const fn test_mm256_cvtepi32_epi16() {
50076	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
50077	let r = _mm256_cvtepi32_epi16(a);
50078	let e = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
50079	assert_eq_m128i(r, e);
50080	}
50081
50082	#[simd_test(enable = "avx512f,avx512vl")]
50083	const fn test_mm256_mask_cvtepi32_epi16() {
50084	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
50085	let src = _mm_set1_epi16(`-1`);
50086	let r = _mm256_mask_cvtepi32_epi16(src, `0`, a);
50087	assert_eq_m128i(r, src);
50088	let r = _mm256_mask_cvtepi32_epi16(src, `0b11111111`, a);
50089	let e = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
50090	assert_eq_m128i(r, e);
50091	}
50092
50093	#[simd_test(enable = "avx512f,avx512vl")]
50094	const fn test_mm256_maskz_cvtepi32_epi16() {
50095	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
50096	let r = _mm256_maskz_cvtepi32_epi16(`0`, a);
50097	assert_eq_m128i(r, _mm_setzero_si128());
50098	let r = _mm256_maskz_cvtepi32_epi16(`0b11111111`, a);
50099	let e = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
50100	assert_eq_m128i(r, e);
50101	}
50102
50103	#[simd_test(enable = "avx512f,avx512vl")]
50104	fn test_mm_cvtepi32_epi16() {
50105	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
50106	let r = _mm_cvtepi32_epi16(a);
50107	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
50108	assert_eq_m128i(r, e);
50109	}
50110
50111	#[simd_test(enable = "avx512f,avx512vl")]
50112	fn test_mm_mask_cvtepi32_epi16() {
50113	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
50114	let src = _mm_set1_epi16(`0`);
50115	let r = _mm_mask_cvtepi32_epi16(src, `0`, a);
50116	assert_eq_m128i(r, src);
50117	let r = _mm_mask_cvtepi32_epi16(src, `0b00001111`, a);
50118	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
50119	assert_eq_m128i(r, e);
50120	}
50121
50122	#[simd_test(enable = "avx512f,avx512vl")]
50123	fn test_mm_maskz_cvtepi32_epi16() {
50124	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
50125	let r = _mm_maskz_cvtepi32_epi16(`0`, a);
50126	assert_eq_m128i(r, _mm_setzero_si128());
50127	let r = _mm_maskz_cvtepi32_epi16(`0b00001111`, a);
50128	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
50129	assert_eq_m128i(r, e);
50130	}
50131
50132	#[simd_test(enable = "avx512f")]
50133	const fn test_mm512_cvtepi32_epi8() {
50134	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
50135	let r = _mm512_cvtepi32_epi8(a);
50136	let e = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
50137	assert_eq_m128i(r, e);
50138	}
50139
50140	#[simd_test(enable = "avx512f")]
50141	const fn test_mm512_mask_cvtepi32_epi8() {
50142	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
50143	let src = _mm_set1_epi8(`-1`);
50144	let r = _mm512_mask_cvtepi32_epi8(src, `0`, a);
50145	assert_eq_m128i(r, src);
50146	let r = _mm512_mask_cvtepi32_epi8(src, `0b00000000_11111111`, a);
50147	let e = _mm_set_epi8(`-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
50148	assert_eq_m128i(r, e);
50149	}
50150
50151	#[simd_test(enable = "avx512f")]
50152	const fn test_mm512_maskz_cvtepi32_epi8() {
50153	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
50154	let r = _mm512_maskz_cvtepi32_epi8(`0`, a);
50155	assert_eq_m128i(r, _mm_setzero_si128());
50156	let r = _mm512_maskz_cvtepi32_epi8(`0b00000000_11111111`, a);
50157	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
50158	assert_eq_m128i(r, e);
50159	}
50160
50161	#[simd_test(enable = "avx512f,avx512vl")]
50162	fn test_mm256_cvtepi32_epi8() {
50163	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
50164	let r = _mm256_cvtepi32_epi8(a);
50165	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
50166	assert_eq_m128i(r, e);
50167	}
50168
50169	#[simd_test(enable = "avx512f,avx512vl")]
50170	fn test_mm256_mask_cvtepi32_epi8() {
50171	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
50172	let src = _mm_set1_epi8(`0`);
50173	let r = _mm256_mask_cvtepi32_epi8(src, `0`, a);
50174	assert_eq_m128i(r, src);
50175	let r = _mm256_mask_cvtepi32_epi8(src, `0b11111111`, a);
50176	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
50177	assert_eq_m128i(r, e);
50178	}
50179
50180	#[simd_test(enable = "avx512f,avx512vl")]
50181	fn test_mm256_maskz_cvtepi32_epi8() {
50182	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
50183	let r = _mm256_maskz_cvtepi32_epi8(`0`, a);
50184	assert_eq_m128i(r, _mm_setzero_si128());
50185	let r = _mm256_maskz_cvtepi32_epi8(`0b11111111`, a);
50186	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
50187	assert_eq_m128i(r, e);
50188	}
50189
50190	#[simd_test(enable = "avx512f,avx512vl")]
50191	fn test_mm_cvtepi32_epi8() {
50192	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
50193	let r = _mm_cvtepi32_epi8(a);
50194	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
50195	assert_eq_m128i(r, e);
50196	}
50197
50198	#[simd_test(enable = "avx512f,avx512vl")]
50199	fn test_mm_mask_cvtepi32_epi8() {
50200	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
50201	let src = _mm_set1_epi8(`0`);
50202	let r = _mm_mask_cvtepi32_epi8(src, `0`, a);
50203	assert_eq_m128i(r, src);
50204	let r = _mm_mask_cvtepi32_epi8(src, `0b00001111`, a);
50205	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
50206	assert_eq_m128i(r, e);
50207	}
50208
50209	#[simd_test(enable = "avx512f,avx512vl")]
50210	fn test_mm_maskz_cvtepi32_epi8() {
50211	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
50212	let r = _mm_maskz_cvtepi32_epi8(`0`, a);
50213	assert_eq_m128i(r, _mm_setzero_si128());
50214	let r = _mm_maskz_cvtepi32_epi8(`0b00001111`, a);
50215	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
50216	assert_eq_m128i(r, e);
50217	}
50218
50219	#[simd_test(enable = "avx512f")]
50220	fn test_mm512_cvtsepi32_epi16() {
50221	#[rustfmt::skip]
50222	let a = _mm512_set_epi32(
50223	`0`, `1`, `2`, `3`,
50224	`4`, `5`, `6`, `7`,
50225	`8`, `9`, `10`, `11`,
50226	`12`, `13`, i32::MIN, i32::MAX,
50227	);
50228	let r = _mm512_cvtsepi32_epi16(a);
50229	#[rustfmt::skip]
50230	let e = _mm256_set_epi16(
50231	`0`, `1`, `2`, `3`,
50232	`4`, `5`, `6`, `7`,
50233	`8`, `9`, `10`, `11`,
50234	`12`, `13`, i16::MIN, i16::MAX,
50235	);
50236	assert_eq_m256i(r, e);
50237	}
50238
50239	#[simd_test(enable = "avx512f")]
50240	fn test_mm512_mask_cvtsepi32_epi16() {
50241	#[rustfmt::skip]
50242	let a = _mm512_set_epi32(
50243	`0`, `1`, `2`, `3`,
50244	`4`, `5`, `6`, `7`,
50245	`8`, `9`, `10`, `11`,
50246	`12`, `13`, i32::MIN, i32::MAX,
50247	);
50248	let src = _mm256_set1_epi16(`-1`);
50249	let r = _mm512_mask_cvtsepi32_epi16(src, `0`, a);
50250	assert_eq_m256i(r, src);
50251	let r = _mm512_mask_cvtsepi32_epi16(src, `0b00000000_11111111`, a);
50252	#[rustfmt::skip]
50253	let e = _mm256_set_epi16(
50254	`-1`, `-1`, `-1`, `-1`,
50255	`-1`, `-1`, `-1`, `-1`,
50256	`8`, `9`, `10`, `11`,
50257	`12`, `13`, i16::MIN, i16::MAX,
50258	);
50259	assert_eq_m256i(r, e);
50260	}
50261
50262	#[simd_test(enable = "avx512f")]
50263	fn test_mm512_maskz_cvtsepi32_epi16() {
50264	#[rustfmt::skip]
50265	let a = _mm512_set_epi32(
50266	`0`, `1`, `2`, `3`,
50267	`4`, `5`, `6`, `7`,
50268	`8`, `9`, `10`, `11`,
50269	`12`, `13`, i32::MIN, i32::MAX,
50270	);
50271	let r = _mm512_maskz_cvtsepi32_epi16(`0`, a);
50272	assert_eq_m256i(r, _mm256_setzero_si256());
50273	let r = _mm512_maskz_cvtsepi32_epi16(`0b00000000_11111111`, a);
50274	#[rustfmt::skip]
50275	let e = _mm256_set_epi16(
50276	`0`, `0`, `0`, `0`,
50277	`0`, `0`, `0`, `0`,
50278	`8`, `9`, `10`, `11`,
50279	`12`, `13`, i16::MIN, i16::MAX,
50280	);
50281	assert_eq_m256i(r, e);
50282	}
50283
50284	#[simd_test(enable = "avx512f,avx512vl")]
50285	fn test_mm256_cvtsepi32_epi16() {
50286	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
50287	let r = _mm256_cvtsepi32_epi16(a);
50288	let e = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
50289	assert_eq_m128i(r, e);
50290	}
50291
50292	#[simd_test(enable = "avx512f,avx512vl")]
50293	fn test_mm256_mask_cvtsepi32_epi16() {
50294	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
50295	let src = _mm_set1_epi16(`-1`);
50296	let r = _mm256_mask_cvtsepi32_epi16(src, `0`, a);
50297	assert_eq_m128i(r, src);
50298	let r = _mm256_mask_cvtsepi32_epi16(src, `0b11111111`, a);
50299	let e = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
50300	assert_eq_m128i(r, e);
50301	}
50302
50303	#[simd_test(enable = "avx512f,avx512vl")]
50304	fn test_mm256_maskz_cvtsepi32_epi16() {
50305	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
50306	let r = _mm256_maskz_cvtsepi32_epi16(`0`, a);
50307	assert_eq_m128i(r, _mm_setzero_si128());
50308	let r = _mm256_maskz_cvtsepi32_epi16(`0b11111111`, a);
50309	let e = _mm_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
50310	assert_eq_m128i(r, e);
50311	}
50312
50313	#[simd_test(enable = "avx512f,avx512vl")]
50314	fn test_mm_cvtsepi32_epi16() {
50315	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
50316	let r = _mm_cvtsepi32_epi16(a);
50317	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
50318	assert_eq_m128i(r, e);
50319	}
50320
50321	#[simd_test(enable = "avx512f,avx512vl")]
50322	fn test_mm_mask_cvtsepi32_epi16() {
50323	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
50324	let src = _mm_set1_epi16(`0`);
50325	let r = _mm_mask_cvtsepi32_epi16(src, `0`, a);
50326	assert_eq_m128i(r, src);
50327	let r = _mm_mask_cvtsepi32_epi16(src, `0b11111111`, a);
50328	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
50329	assert_eq_m128i(r, e);
50330	}
50331
50332	#[simd_test(enable = "avx512f,avx512vl")]
50333	fn test_mm_maskz_cvtsepi32_epi16() {
50334	let a = _mm_set_epi32(`4`, `5`, `6`, `7`);
50335	let r = _mm_maskz_cvtsepi32_epi16(`0`, a);
50336	assert_eq_m128i(r, _mm_setzero_si128());
50337	let r = _mm_maskz_cvtsepi32_epi16(`0b11111111`, a);
50338	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `4`, `5`, `6`, `7`);
50339	assert_eq_m128i(r, e);
50340	}
50341
50342	#[simd_test(enable = "avx512f")]
50343	fn test_mm512_cvtsepi32_epi8() {
50344	#[rustfmt::skip]
50345	let a = _mm512_set_epi32(
50346	`0`, `1`, `2`, `3`,
50347	`4`, `5`, `6`, `7`,
50348	`8`, `9`, `10`, `11`,
50349	`12`, `13`, i32::MIN, i32::MAX,
50350	);
50351	let r = _mm512_cvtsepi32_epi8(a);
50352	#[rustfmt::skip]
50353	let e = _mm_set_epi8(
50354	`0`, `1`, `2`, `3`,
50355	`4`, `5`, `6`, `7`,
50356	`8`, `9`, `10`, `11`,
50357	`12`, `13`, i8::MIN, i8::MAX,
50358	);
50359	assert_eq_m128i(r, e);
50360	}
50361
50362	#[simd_test(enable = "avx512f")]
50363	fn test_mm512_mask_cvtsepi32_epi8() {
50364	#[rustfmt::skip]
50365	let a = _mm512_set_epi32(
50366	`0`, `1`, `2`, `3`,
50367	`4`, `5`, `6`, `7`,
50368	`8`, `9`, `10`, `11`,
50369	`12`, `13`, i32::MIN, i32::MAX,
50370	);
50371	let src = _mm_set1_epi8(`-1`);
50372	let r = _mm512_mask_cvtsepi32_epi8(src, `0`, a);
50373	assert_eq_m128i(r, src);
50374	let r = _mm512_mask_cvtsepi32_epi8(src, `0b00000000_11111111`, a);
50375	#[rustfmt::skip]
50376	let e = _mm_set_epi8(
50377	`-1`, `-1`, `-1`, `-1`,
50378	`-1`, `-1`, `-1`, `-1`,
50379	`8`, `9`, `10`, `11`,
50380	`12`, `13`, i8::MIN, i8::MAX,
50381	);
50382	assert_eq_m128i(r, e);
50383	}
50384
50385	#[simd_test(enable = "avx512f")]
50386	fn test_mm512_maskz_cvtsepi32_epi8() {
50387	#[rustfmt::skip]
50388	let a = _mm512_set_epi32(
50389	`0`, `1`, `2`, `3`,
50390	`4`, `5`, `6`, `7`,
50391	`8`, `9`, `10`, `11`,
50392	`12`, `13`, i32::MIN, i32::MAX,
50393	);
50394	let r = _mm512_maskz_cvtsepi32_epi8(`0`, a);
50395	assert_eq_m128i(r, _mm_setzero_si128());
50396	let r = _mm512_maskz_cvtsepi32_epi8(`0b00000000_11111111`, a);
50397	#[rustfmt::skip]
50398	let e = _mm_set_epi8(
50399	`0`, `0`, `0`, `0`,
50400	`0`, `0`, `0`, `0`,
50401	`8`, `9`, `10`, `11`,
50402	`12`, `13`, i8::MIN, i8::MAX,
50403	);
50404	assert_eq_m128i(r, e);
50405	}
50406
50407	#[simd_test(enable = "avx512f,avx512vl")]
50408	fn test_mm256_cvtsepi32_epi8() {
50409	let a = _mm256_set_epi32(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
50410	let r = _mm256_cvtsepi32_epi8(a);
50411	#[rustfmt::skip]
50412	let e = _mm_set_epi8(
50413	`0`, `0`, `0`, `0`,
50414	`0`, `0`, `0`, `0`,
50415	`9`, `10`, `11`, `12`,
50416	`13`, `14`, `15`, `16`,
50417	);
50418	assert_eq_m128i(r, e);
50419	}
50420
50421	#[simd_test(enable = "avx512f,avx512vl")]
50422	fn test_mm256_mask_cvtsepi32_epi8() {
50423	let a = _mm256_set_epi32(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
50424	let src = _mm_set1_epi8(`0`);
50425	let r = _mm256_mask_cvtsepi32_epi8(src, `0`, a);
50426	assert_eq_m128i(r, src);
50427	let r = _mm256_mask_cvtsepi32_epi8(src, `0b11111111`, a);
50428	#[rustfmt::skip]
50429	let e = _mm_set_epi8(
50430	`0`, `0`, `0`, `0`,
50431	`0`, `0`, `0`, `0`,
50432	`9`, `10`, `11`, `12`,
50433	`13`, `14`, `15`, `16`,
50434	);
50435	assert_eq_m128i(r, e);
50436	}
50437
50438	#[simd_test(enable = "avx512f,avx512vl")]
50439	fn test_mm256_maskz_cvtsepi32_epi8() {
50440	let a = _mm256_set_epi32(`9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
50441	let r = _mm256_maskz_cvtsepi32_epi8(`0`, a);
50442	assert_eq_m128i(r, _mm_setzero_si128());
50443	let r = _mm256_maskz_cvtsepi32_epi8(`0b11111111`, a);
50444	#[rustfmt::skip]
50445	let e = _mm_set_epi8(
50446	`0`, `0`, `0`, `0`,
50447	`0`, `0`, `0`, `0`,
50448	`9`, `10`, `11`, `12`,
50449	`13`, `14`, `15`, `16`,
50450	);
50451	assert_eq_m128i(r, e);
50452	}
50453
50454	#[simd_test(enable = "avx512f,avx512vl")]
50455	fn test_mm_cvtsepi32_epi8() {
50456	let a = _mm_set_epi32(`13`, `14`, `15`, `16`);
50457	let r = _mm_cvtsepi32_epi8(a);
50458	#[rustfmt::skip]
50459	let e = _mm_set_epi8(
50460	`0`, `0`, `0`, `0`,
50461	`0`, `0`, `0`, `0`,
50462	`0`, `0`, `0`, `0`,
50463	`13`, `14`, `15`, `16`,
50464	);
50465	assert_eq_m128i(r, e);
50466	}
50467
50468	#[simd_test(enable = "avx512f,avx512vl")]
50469	fn test_mm_mask_cvtsepi32_epi8() {
50470	let a = _mm_set_epi32(`13`, `14`, `15`, `16`);
50471	let src = _mm_set1_epi8(`0`);
50472	let r = _mm_mask_cvtsepi32_epi8(src, `0`, a);
50473	assert_eq_m128i(r, src);
50474	let r = _mm_mask_cvtsepi32_epi8(src, `0b00001111`, a);
50475	#[rustfmt::skip]
50476	let e = _mm_set_epi8(
50477	`0`, `0`, `0`, `0`,
50478	`0`, `0`, `0`, `0`,
50479	`0`, `0`, `0`, `0`,
50480	`13`, `14`, `15`, `16`,
50481	);
50482	assert_eq_m128i(r, e);
50483	}
50484
50485	#[simd_test(enable = "avx512f,avx512vl")]
50486	fn test_mm_maskz_cvtsepi32_epi8() {
50487	let a = _mm_set_epi32(`13`, `14`, `15`, `16`);
50488	let r = _mm_maskz_cvtsepi32_epi8(`0`, a);
50489	assert_eq_m128i(r, _mm_setzero_si128());
50490	let r = _mm_maskz_cvtsepi32_epi8(`0b00001111`, a);
50491	#[rustfmt::skip]
50492	let e = _mm_set_epi8(
50493	`0`, `0`, `0`, `0`,
50494	`0`, `0`, `0`, `0`,
50495	`0`, `0`, `0`, `0`,
50496	`13`, `14`, `15`, `16`,
50497	);
50498	assert_eq_m128i(r, e);
50499	}
50500
50501	#[simd_test(enable = "avx512f")]
50502	fn test_mm512_cvtusepi32_epi16() {
50503	#[rustfmt::skip]
50504	let a = _mm512_set_epi32(
50505	`0`, `1`, `2`, `3`,
50506	`4`, `5`, `6`, `7`,
50507	`8`, `9`, `10`, `11`,
50508	`12`, `13`, i32::MIN, i32::MIN,
50509	);
50510	let r = _mm512_cvtusepi32_epi16(a);
50511	let e = _mm256_set_epi16(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `-1`, `-1`);
50512	assert_eq_m256i(r, e);
50513	}
50514
50515	#[simd_test(enable = "avx512f")]
50516	fn test_mm512_mask_cvtusepi32_epi16() {
50517	#[rustfmt::skip]
50518	let a = _mm512_set_epi32(
50519	`0`, `1`, `2`, `3`,
50520	`4`, `5`, `6`, `7`,
50521	`8`, `9`, `10`, `11`,
50522	`12`, `13`, i32::MIN, i32::MIN,
50523	);
50524	let src = _mm256_set1_epi16(`-1`);
50525	let r = _mm512_mask_cvtusepi32_epi16(src, `0`, a);
50526	assert_eq_m256i(r, src);
50527	let r = _mm512_mask_cvtusepi32_epi16(src, `0b00000000_11111111`, a);
50528	let e = _mm256_set_epi16(`-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `-1`, `-1`);
50529	assert_eq_m256i(r, e);
50530	}
50531
50532	#[simd_test(enable = "avx512f")]
50533	fn test_mm512_maskz_cvtusepi32_epi16() {
50534	#[rustfmt::skip]
50535	let a = _mm512_set_epi32(
50536	`0`, `1`, `2`, `3`,
50537	`4`, `5`, `6`, `7`,
50538	`8`, `9`, `10`, `11`,
50539	`12`, `13`, i32::MIN, i32::MIN,
50540	);
50541	let r = _mm512_maskz_cvtusepi32_epi16(`0`, a);
50542	assert_eq_m256i(r, _mm256_setzero_si256());
50543	let r = _mm512_maskz_cvtusepi32_epi16(`0b00000000_11111111`, a);
50544	let e = _mm256_set_epi16(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `9`, `10`, `11`, `12`, `13`, `-1`, `-1`);
50545	assert_eq_m256i(r, e);
50546	}
50547
50548	#[simd_test(enable = "avx512f,avx512vl")]
50549	fn test_mm256_cvtusepi32_epi16() {
50550	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
50551	let r = _mm256_cvtusepi32_epi16(a);
50552	let e = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
50553	assert_eq_m128i(r, e);
50554	}
50555
50556	#[simd_test(enable = "avx512f,avx512vl")]
50557	fn test_mm256_mask_cvtusepi32_epi16() {
50558	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
50559	let src = _mm_set1_epi16(`0`);
50560	let r = _mm256_mask_cvtusepi32_epi16(src, `0`, a);
50561	assert_eq_m128i(r, src);
50562	let r = _mm256_mask_cvtusepi32_epi16(src, `0b11111111`, a);
50563	let e = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
50564	assert_eq_m128i(r, e);
50565	}
50566
50567	#[simd_test(enable = "avx512f,avx512vl")]
50568	fn test_mm256_maskz_cvtusepi32_epi16() {
50569	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
50570	let r = _mm256_maskz_cvtusepi32_epi16(`0`, a);
50571	assert_eq_m128i(r, _mm_setzero_si128());
50572	let r = _mm256_maskz_cvtusepi32_epi16(`0b11111111`, a);
50573	let e = _mm_set_epi16(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
50574	assert_eq_m128i(r, e);
50575	}
50576
50577	#[simd_test(enable = "avx512f,avx512vl")]
50578	fn test_mm_cvtusepi32_epi16() {
50579	let a = _mm_set_epi32(`5`, `6`, `7`, `8`);
50580	let r = _mm_cvtusepi32_epi16(a);
50581	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `5`, `6`, `7`, `8`);
50582	assert_eq_m128i(r, e);
50583	}
50584
50585	#[simd_test(enable = "avx512f,avx512vl")]
50586	fn test_mm_mask_cvtusepi32_epi16() {
50587	let a = _mm_set_epi32(`5`, `6`, `7`, `8`);
50588	let src = _mm_set1_epi16(`0`);
50589	let r = _mm_mask_cvtusepi32_epi16(src, `0`, a);
50590	assert_eq_m128i(r, src);
50591	let r = _mm_mask_cvtusepi32_epi16(src, `0b00001111`, a);
50592	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `5`, `6`, `7`, `8`);
50593	assert_eq_m128i(r, e);
50594	}
50595
50596	#[simd_test(enable = "avx512f,avx512vl")]
50597	fn test_mm_maskz_cvtusepi32_epi16() {
50598	let a = _mm_set_epi32(`5`, `6`, `7`, `8`);
50599	let r = _mm_maskz_cvtusepi32_epi16(`0`, a);
50600	assert_eq_m128i(r, _mm_setzero_si128());
50601	let r = _mm_maskz_cvtusepi32_epi16(`0b00001111`, a);
50602	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `5`, `6`, `7`, `8`);
50603	assert_eq_m128i(r, e);
50604	}
50605
50606	#[simd_test(enable = "avx512f")]
50607	fn test_mm512_cvtusepi32_epi8() {
50608	#[rustfmt::skip]
50609	let a = _mm512_set_epi32(
50610	`0`, `1`, `2`, `3`,
50611	`4`, `5`, `6`, `7`,
50612	`8`, `9`, `10`, `11`,
50613	`12`, `13`, i32::MIN, i32::MIN,
50614	);
50615	let r = _mm512_cvtusepi32_epi8(a);
50616	let e = _mm_set_epi8(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `-1`, `-1`);
50617	assert_eq_m128i(r, e);
50618	}
50619
50620	#[simd_test(enable = "avx512f")]
50621	fn test_mm512_mask_cvtusepi32_epi8() {
50622	#[rustfmt::skip]
50623	let a = _mm512_set_epi32(
50624	`0`, `1`, `2`, `3`,
50625	`4`, `5`, `6`, `7`,
50626	`8`, `9`, `10`, `11`,
50627	`12`, `13`, i32::MIN, i32::MIN,
50628	);
50629	let src = _mm_set1_epi8(`-1`);
50630	let r = _mm512_mask_cvtusepi32_epi8(src, `0`, a);
50631	assert_eq_m128i(r, src);
50632	let r = _mm512_mask_cvtusepi32_epi8(src, `0b00000000_11111111`, a);
50633	let e = _mm_set_epi8(`-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `-1`, `-1`);
50634	assert_eq_m128i(r, e);
50635	}
50636
50637	#[simd_test(enable = "avx512f")]
50638	fn test_mm512_maskz_cvtusepi32_epi8() {
50639	#[rustfmt::skip]
50640	let a = _mm512_set_epi32(
50641	`0`, `1`, `2`, `3`,
50642	`4`, `5`, `6`, `7`,
50643	`8`, `9`, `10`, `11`,
50644	`12`, `13`, i32::MIN, i32::MIN,
50645	);
50646	let r = _mm512_maskz_cvtusepi32_epi8(`0`, a);
50647	assert_eq_m128i(r, _mm_setzero_si128());
50648	let r = _mm512_maskz_cvtusepi32_epi8(`0b00000000_11111111`, a);
50649	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`, `9`, `10`, `11`, `12`, `13`, `-1`, `-1`);
50650	assert_eq_m128i(r, e);
50651	}
50652
50653	#[simd_test(enable = "avx512f,avx512vl")]
50654	fn test_mm256_cvtusepi32_epi8() {
50655	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, i32::MAX);
50656	let r = _mm256_cvtusepi32_epi8(a);
50657	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, u8::MAX as i8);
50658	assert_eq_m128i(r, e);
50659	}
50660
50661	#[simd_test(enable = "avx512f,avx512vl")]
50662	fn test_mm256_mask_cvtusepi32_epi8() {
50663	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, i32::MAX);
50664	let src = _mm_set1_epi8(`0`);
50665	let r = _mm256_mask_cvtusepi32_epi8(src, `0`, a);
50666	assert_eq_m128i(r, src);
50667	let r = _mm256_mask_cvtusepi32_epi8(src, `0b11111111`, a);
50668	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, u8::MAX as i8);
50669	assert_eq_m128i(r, e);
50670	}
50671
50672	#[simd_test(enable = "avx512f,avx512vl")]
50673	fn test_mm256_maskz_cvtusepi32_epi8() {
50674	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, i32::MAX);
50675	let r = _mm256_maskz_cvtusepi32_epi8(`0`, a);
50676	assert_eq_m128i(r, _mm_setzero_si128());
50677	let r = _mm256_maskz_cvtusepi32_epi8(`0b11111111`, a);
50678	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, u8::MAX as i8);
50679	assert_eq_m128i(r, e);
50680	}
50681
50682	#[simd_test(enable = "avx512f,avx512vl")]
50683	fn test_mm_cvtusepi32_epi8() {
50684	let a = _mm_set_epi32(`5`, `6`, `7`, i32::MAX);
50685	let r = _mm_cvtusepi32_epi8(a);
50686	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `5`, `6`, `7`, u8::MAX as i8);
50687	assert_eq_m128i(r, e);
50688	}
50689
50690	#[simd_test(enable = "avx512f,avx512vl")]
50691	fn test_mm_mask_cvtusepi32_epi8() {
50692	let a = _mm_set_epi32(`5`, `6`, `7`, i32::MAX);
50693	let src = _mm_set1_epi8(`0`);
50694	let r = _mm_mask_cvtusepi32_epi8(src, `0`, a);
50695	assert_eq_m128i(r, src);
50696	let r = _mm_mask_cvtusepi32_epi8(src, `0b00001111`, a);
50697	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `5`, `6`, `7`, u8::MAX as i8);
50698	assert_eq_m128i(r, e);
50699	}
50700
50701	#[simd_test(enable = "avx512f,avx512vl")]
50702	fn test_mm_maskz_cvtusepi32_epi8() {
50703	let a = _mm_set_epi32(`5`, `6`, `7`, i32::MAX);
50704	let r = _mm_maskz_cvtusepi32_epi8(`0`, a);
50705	assert_eq_m128i(r, _mm_setzero_si128());
50706	let r = _mm_maskz_cvtusepi32_epi8(`0b00001111`, a);
50707	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `5`, `6`, `7`, u8::MAX as i8);
50708	assert_eq_m128i(r, e);
50709	}
50710
50711	#[simd_test(enable = "avx512f")]
50712	fn test_mm512_cvt_roundps_epi32() {
50713	let a = _mm512_setr_ps(
50714	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
50715	);
50716	let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
50717	let e = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
50718	assert_eq_m512i(r, e);
50719	let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEG_INF \| _MM_FROUND_NO_EXC }>(a);
50720	let e = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
50721	assert_eq_m512i(r, e);
50722	}
50723
50724	#[simd_test(enable = "avx512f")]
50725	fn test_mm512_mask_cvt_roundps_epi32() {
50726	let a = _mm512_setr_ps(
50727	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
50728	);
50729	let src = _mm512_set1_epi32(`0`);
50730	let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
50731	src, `0`, a,
50732	);
50733	assert_eq_m512i(r, src);
50734	let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
50735	src,
50736	`0b00000000_11111111`,
50737	a,
50738	);
50739	let e = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
50740	assert_eq_m512i(r, e);
50741	}
50742
50743	#[simd_test(enable = "avx512f")]
50744	fn test_mm512_maskz_cvt_roundps_epi32() {
50745	let a = _mm512_setr_ps(
50746	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
50747	);
50748	let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
50749	`0`, a,
50750	);
50751	assert_eq_m512i(r, _mm512_setzero_si512());
50752	let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
50753	`0b00000000_11111111`,
50754	a,
50755	);
50756	let e = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
50757	assert_eq_m512i(r, e);
50758	}
50759
50760	#[simd_test(enable = "avx512f")]
50761	fn test_mm512_cvt_roundps_epu32() {
50762	let a = _mm512_setr_ps(
50763	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
50764	);
50765	let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
50766	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
50767	assert_eq_m512i(r, e);
50768	let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEG_INF \| _MM_FROUND_NO_EXC }>(a);
50769	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
50770	assert_eq_m512i(r, e);
50771	}
50772
50773	#[simd_test(enable = "avx512f")]
50774	fn test_mm512_mask_cvt_roundps_epu32() {
50775	let a = _mm512_setr_ps(
50776	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
50777	);
50778	let src = _mm512_set1_epi32(`0`);
50779	let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
50780	src, `0`, a,
50781	);
50782	assert_eq_m512i(r, src);
50783	let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
50784	src,
50785	`0b00000000_11111111`,
50786	a,
50787	);
50788	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
50789	assert_eq_m512i(r, e);
50790	}
50791
50792	#[simd_test(enable = "avx512f")]
50793	fn test_mm512_maskz_cvt_roundps_epu32() {
50794	let a = _mm512_setr_ps(
50795	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
50796	);
50797	let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
50798	`0`, a,
50799	);
50800	assert_eq_m512i(r, _mm512_setzero_si512());
50801	let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
50802	`0b00000000_11111111`,
50803	a,
50804	);
50805	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
50806	assert_eq_m512i(r, e);
50807	}
50808
50809	#[simd_test(enable = "avx512f")]
50810	fn test_mm512_cvt_roundepi32_ps() {
50811	let a = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
50812	let r = _mm512_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
50813	let e = _mm512_setr_ps(
50814	`0.`, `-2.`, `2.`, `-4.`, `4.`, `-6.`, `6.`, `-8.`, `8.`, `10.`, `10.`, `12.`, `12.`, `14.`, `14.`, `16.`,
50815	);
50816	assert_eq_m512(r, e);
50817	}
50818
50819	#[simd_test(enable = "avx512f")]
50820	fn test_mm512_mask_cvt_roundepi32_ps() {
50821	let a = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
50822	let src = _mm512_set1_ps(`0.`);
50823	let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
50824	src, `0`, a,
50825	);
50826	assert_eq_m512(r, src);
50827	let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
50828	src,
50829	`0b00000000_11111111`,
50830	a,
50831	);
50832	let e = _mm512_setr_ps(
50833	`0.`, `-2.`, `2.`, `-4.`, `4.`, `-6.`, `6.`, `-8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
50834	);
50835	assert_eq_m512(r, e);
50836	}
50837
50838	#[simd_test(enable = "avx512f")]
50839	fn test_mm512_maskz_cvt_roundepi32_ps() {
50840	let a = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
50841	let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
50842	`0`, a,
50843	);
50844	assert_eq_m512(r, _mm512_setzero_ps());
50845	let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
50846	`0b00000000_11111111`,
50847	a,
50848	);
50849	let e = _mm512_setr_ps(
50850	`0.`, `-2.`, `2.`, `-4.`, `4.`, `-6.`, `6.`, `-8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
50851	);
50852	assert_eq_m512(r, e);
50853	}
50854
50855	#[simd_test(enable = "avx512f")]
50856	fn test_mm512_cvt_roundepu32_ps() {
50857	let a = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
50858	let r = _mm512_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a);
50859	#[rustfmt::skip]
50860	let e = _mm512_setr_ps(
50861	`0.`, `4294967300.`, `2.`, `4294967300.`,
50862	`4.`, `4294967300.`, `6.`, `4294967300.`,
50863	`8.`, `10.`, `10.`, `12.`,
50864	`12.`, `14.`, `14.`, `16.`,
50865	);
50866	assert_eq_m512(r, e);
50867	}
50868
50869	#[simd_test(enable = "avx512f")]
50870	fn test_mm512_mask_cvt_roundepu32_ps() {
50871	let a = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
50872	let src = _mm512_set1_ps(`0.`);
50873	let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
50874	src, `0`, a,
50875	);
50876	assert_eq_m512(r, src);
50877	let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
50878	src,
50879	`0b00000000_11111111`,
50880	a,
50881	);
50882	#[rustfmt::skip]
50883	let e = _mm512_setr_ps(
50884	`0.`, `4294967300.`, `2.`, `4294967300.`,
50885	`4.`, `4294967300.`, `6.`, `4294967300.`,
50886	`0.`, `0.`, `0.`, `0.`,
50887	`0.`, `0.`, `0.`, `0.`,
50888	);
50889	assert_eq_m512(r, e);
50890	}
50891
50892	#[simd_test(enable = "avx512f")]
50893	fn test_mm512_maskz_cvt_roundepu32_ps() {
50894	let a = _mm512_setr_epi32(`0`, `-2`, `2`, `-4`, `4`, `-6`, `6`, `-8`, `8`, `10`, `10`, `12`, `12`, `14`, `14`, `16`);
50895	let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
50896	`0`, a,
50897	);
50898	assert_eq_m512(r, _mm512_setzero_ps());
50899	let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
50900	`0b00000000_11111111`,
50901	a,
50902	);
50903	#[rustfmt::skip]
50904	let e = _mm512_setr_ps(
50905	`0.`, `4294967300.`, `2.`, `4294967300.`,
50906	`4.`, `4294967300.`, `6.`, `4294967300.`,
50907	`0.`, `0.`, `0.`, `0.`,
50908	`0.`, `0.`, `0.`, `0.`,
50909	);
50910	assert_eq_m512(r, e);
50911	}
50912
50913	#[simd_test(enable = "avx512f")]
50914	fn test_mm512_cvt_roundps_ph() {
50915	let a = _mm512_set1_ps(`1.`);
50916	let r = _mm512_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(a);
50917	let e = _mm256_setr_epi64x(
50918	`4323521613979991040`,
50919	`4323521613979991040`,
50920	`4323521613979991040`,
50921	`4323521613979991040`,
50922	);
50923	assert_eq_m256i(r, e);
50924	}
50925
50926	#[simd_test(enable = "avx512f")]
50927	fn test_mm512_mask_cvt_roundps_ph() {
50928	let a = _mm512_set1_ps(`1.`);
50929	let src = _mm256_set1_epi16(`0`);
50930	let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, `0`, a);
50931	assert_eq_m256i(r, src);
50932	let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, `0b00000000_11111111`, a);
50933	let e = _mm256_setr_epi64x(`4323521613979991040`, `4323521613979991040`, `0`, `0`);
50934	assert_eq_m256i(r, e);
50935	}
50936
50937	#[simd_test(enable = "avx512f")]
50938	fn test_mm512_maskz_cvt_roundps_ph() {
50939	let a = _mm512_set1_ps(`1.`);
50940	let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(`0`, a);
50941	assert_eq_m256i(r, _mm256_setzero_si256());
50942	let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(`0b00000000_11111111`, a);
50943	let e = _mm256_setr_epi64x(`4323521613979991040`, `4323521613979991040`, `0`, `0`);
50944	assert_eq_m256i(r, e);
50945	}
50946
50947	#[simd_test(enable = "avx512f,avx512vl")]
50948	fn test_mm256_mask_cvt_roundps_ph() {
50949	let a = _mm256_set1_ps(`1.`);
50950	let src = _mm_set1_epi16(`0`);
50951	let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, `0`, a);
50952	assert_eq_m128i(r, src);
50953	let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, `0b11111111`, a);
50954	let e = _mm_setr_epi64x(`4323521613979991040`, `4323521613979991040`);
50955	assert_eq_m128i(r, e);
50956	}
50957
50958	#[simd_test(enable = "avx512f,avx512vl")]
50959	fn test_mm256_maskz_cvt_roundps_ph() {
50960	let a = _mm256_set1_ps(`1.`);
50961	let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(`0`, a);
50962	assert_eq_m128i(r, _mm_setzero_si128());
50963	let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(`0b11111111`, a);
50964	let e = _mm_setr_epi64x(`4323521613979991040`, `4323521613979991040`);
50965	assert_eq_m128i(r, e);
50966	}
50967
50968	#[simd_test(enable = "avx512f,avx512vl")]
50969	fn test_mm_mask_cvt_roundps_ph() {
50970	let a = _mm_set1_ps(`1.`);
50971	let src = _mm_set1_epi16(`0`);
50972	let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, `0`, a);
50973	assert_eq_m128i(r, src);
50974	let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, `0b00001111`, a);
50975	let e = _mm_setr_epi64x(`4323521613979991040`, `0`);
50976	assert_eq_m128i(r, e);
50977	}
50978
50979	#[simd_test(enable = "avx512f,avx512vl")]
50980	fn test_mm_maskz_cvt_roundps_ph() {
50981	let a = _mm_set1_ps(`1.`);
50982	let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(`0`, a);
50983	assert_eq_m128i(r, _mm_setzero_si128());
50984	let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(`0b00001111`, a);
50985	let e = _mm_setr_epi64x(`4323521613979991040`, `0`);
50986	assert_eq_m128i(r, e);
50987	}
50988
50989	#[simd_test(enable = "avx512f")]
50990	fn test_mm512_cvtps_ph() {
50991	let a = _mm512_set1_ps(`1.`);
50992	let r = _mm512_cvtps_ph::<_MM_FROUND_NO_EXC>(a);
50993	let e = _mm256_setr_epi64x(
50994	`4323521613979991040`,
50995	`4323521613979991040`,
50996	`4323521613979991040`,
50997	`4323521613979991040`,
50998	);
50999	assert_eq_m256i(r, e);
51000	}
51001
51002	#[simd_test(enable = "avx512f")]
51003	fn test_mm512_mask_cvtps_ph() {
51004	let a = _mm512_set1_ps(`1.`);
51005	let src = _mm256_set1_epi16(`0`);
51006	let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, `0`, a);
51007	assert_eq_m256i(r, src);
51008	let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, `0b00000000_11111111`, a);
51009	let e = _mm256_setr_epi64x(`4323521613979991040`, `4323521613979991040`, `0`, `0`);
51010	assert_eq_m256i(r, e);
51011	}
51012
51013	#[simd_test(enable = "avx512f")]
51014	fn test_mm512_maskz_cvtps_ph() {
51015	let a = _mm512_set1_ps(`1.`);
51016	let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(`0`, a);
51017	assert_eq_m256i(r, _mm256_setzero_si256());
51018	let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(`0b00000000_11111111`, a);
51019	let e = _mm256_setr_epi64x(`4323521613979991040`, `4323521613979991040`, `0`, `0`);
51020	assert_eq_m256i(r, e);
51021	}
51022
51023	#[simd_test(enable = "avx512f,avx512vl")]
51024	fn test_mm256_mask_cvtps_ph() {
51025	let a = _mm256_set1_ps(`1.`);
51026	let src = _mm_set1_epi16(`0`);
51027	let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, `0`, a);
51028	assert_eq_m128i(r, src);
51029	let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, `0b11111111`, a);
51030	let e = _mm_setr_epi64x(`4323521613979991040`, `4323521613979991040`);
51031	assert_eq_m128i(r, e);
51032	}
51033
51034	#[simd_test(enable = "avx512f,avx512vl")]
51035	fn test_mm256_maskz_cvtps_ph() {
51036	let a = _mm256_set1_ps(`1.`);
51037	let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(`0`, a);
51038	assert_eq_m128i(r, _mm_setzero_si128());
51039	let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(`0b11111111`, a);
51040	let e = _mm_setr_epi64x(`4323521613979991040`, `4323521613979991040`);
51041	assert_eq_m128i(r, e);
51042	}
51043
51044	#[simd_test(enable = "avx512f,avx512vl")]
51045	fn test_mm_mask_cvtps_ph() {
51046	let a = _mm_set1_ps(`1.`);
51047	let src = _mm_set1_epi16(`0`);
51048	let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, `0`, a);
51049	assert_eq_m128i(r, src);
51050	let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, `0b00001111`, a);
51051	let e = _mm_setr_epi64x(`4323521613979991040`, `0`);
51052	assert_eq_m128i(r, e);
51053	}
51054
51055	#[simd_test(enable = "avx512f,avx512vl")]
51056	fn test_mm_maskz_cvtps_ph() {
51057	let a = _mm_set1_ps(`1.`);
51058	let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(`0`, a);
51059	assert_eq_m128i(r, _mm_setzero_si128());
51060	let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(`0b00001111`, a);
51061	let e = _mm_setr_epi64x(`4323521613979991040`, `0`);
51062	assert_eq_m128i(r, e);
51063	}
51064
51065	#[simd_test(enable = "avx512f")]
51066	fn test_mm512_cvt_roundph_ps() {
51067	let a = _mm256_setr_epi64x(
51068	`4323521613979991040`,
51069	`4323521613979991040`,
51070	`4323521613979991040`,
51071	`4323521613979991040`,
51072	);
51073	let r = _mm512_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(a);
51074	let e = _mm512_set1_ps(`1.`);
51075	assert_eq_m512(r, e);
51076	}
51077
51078	#[simd_test(enable = "avx512f")]
51079	fn test_mm512_mask_cvt_roundph_ps() {
51080	let a = _mm256_setr_epi64x(
51081	`4323521613979991040`,
51082	`4323521613979991040`,
51083	`4323521613979991040`,
51084	`4323521613979991040`,
51085	);
51086	let src = _mm512_set1_ps(`0.`);
51087	let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, `0`, a);
51088	assert_eq_m512(r, src);
51089	let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, `0b00000000_11111111`, a);
51090	let e = _mm512_setr_ps(
51091	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
51092	);
51093	assert_eq_m512(r, e);
51094	}
51095
51096	#[simd_test(enable = "avx512f")]
51097	fn test_mm512_maskz_cvt_roundph_ps() {
51098	let a = _mm256_setr_epi64x(
51099	`4323521613979991040`,
51100	`4323521613979991040`,
51101	`4323521613979991040`,
51102	`4323521613979991040`,
51103	);
51104	let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(`0`, a);
51105	assert_eq_m512(r, _mm512_setzero_ps());
51106	let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(`0b00000000_11111111`, a);
51107	let e = _mm512_setr_ps(
51108	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
51109	);
51110	assert_eq_m512(r, e);
51111	}
51112
51113	#[simd_test(enable = "avx512f")]
51114	fn test_mm512_cvtph_ps() {
51115	let a = _mm256_setr_epi64x(
51116	`4323521613979991040`,
51117	`4323521613979991040`,
51118	`4323521613979991040`,
51119	`4323521613979991040`,
51120	);
51121	let r = _mm512_cvtph_ps(a);
51122	let e = _mm512_set1_ps(`1.`);
51123	assert_eq_m512(r, e);
51124	}
51125
51126	#[simd_test(enable = "avx512f")]
51127	fn test_mm512_mask_cvtph_ps() {
51128	let a = _mm256_setr_epi64x(
51129	`4323521613979991040`,
51130	`4323521613979991040`,
51131	`4323521613979991040`,
51132	`4323521613979991040`,
51133	);
51134	let src = _mm512_set1_ps(`0.`);
51135	let r = _mm512_mask_cvtph_ps(src, `0`, a);
51136	assert_eq_m512(r, src);
51137	let r = _mm512_mask_cvtph_ps(src, `0b00000000_11111111`, a);
51138	let e = _mm512_setr_ps(
51139	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
51140	);
51141	assert_eq_m512(r, e);
51142	}
51143
51144	#[simd_test(enable = "avx512f")]
51145	fn test_mm512_maskz_cvtph_ps() {
51146	let a = _mm256_setr_epi64x(
51147	`4323521613979991040`,
51148	`4323521613979991040`,
51149	`4323521613979991040`,
51150	`4323521613979991040`,
51151	);
51152	let r = _mm512_maskz_cvtph_ps(`0`, a);
51153	assert_eq_m512(r, _mm512_setzero_ps());
51154	let r = _mm512_maskz_cvtph_ps(`0b00000000_11111111`, a);
51155	let e = _mm512_setr_ps(
51156	`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
51157	);
51158	assert_eq_m512(r, e);
51159	}
51160
51161	#[simd_test(enable = "avx512f,avx512vl")]
51162	fn test_mm256_mask_cvtph_ps() {
51163	let a = _mm_setr_epi64x(`4323521613979991040`, `4323521613979991040`);
51164	let src = _mm256_set1_ps(`0.`);
51165	let r = _mm256_mask_cvtph_ps(src, `0`, a);
51166	assert_eq_m256(r, src);
51167	let r = _mm256_mask_cvtph_ps(src, `0b11111111`, a);
51168	let e = _mm256_setr_ps(`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`);
51169	assert_eq_m256(r, e);
51170	}
51171
51172	#[simd_test(enable = "avx512f,avx512vl")]
51173	fn test_mm256_maskz_cvtph_ps() {
51174	let a = _mm_setr_epi64x(`4323521613979991040`, `4323521613979991040`);
51175	let r = _mm256_maskz_cvtph_ps(`0`, a);
51176	assert_eq_m256(r, _mm256_setzero_ps());
51177	let r = _mm256_maskz_cvtph_ps(`0b11111111`, a);
51178	let e = _mm256_setr_ps(`1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`);
51179	assert_eq_m256(r, e);
51180	}
51181
51182	#[simd_test(enable = "avx512f,avx512vl")]
51183	fn test_mm_mask_cvtph_ps() {
51184	let a = _mm_setr_epi64x(`4323521613979991040`, `4323521613979991040`);
51185	let src = _mm_set1_ps(`0.`);
51186	let r = _mm_mask_cvtph_ps(src, `0`, a);
51187	assert_eq_m128(r, src);
51188	let r = _mm_mask_cvtph_ps(src, `0b00001111`, a);
51189	let e = _mm_setr_ps(`1.`, `1.`, `1.`, `1.`);
51190	assert_eq_m128(r, e);
51191	}
51192
51193	#[simd_test(enable = "avx512f,avx512vl")]
51194	fn test_mm_maskz_cvtph_ps() {
51195	let a = _mm_setr_epi64x(`4323521613979991040`, `4323521613979991040`);
51196	let r = _mm_maskz_cvtph_ps(`0`, a);
51197	assert_eq_m128(r, _mm_setzero_ps());
51198	let r = _mm_maskz_cvtph_ps(`0b00001111`, a);
51199	let e = _mm_setr_ps(`1.`, `1.`, `1.`, `1.`);
51200	assert_eq_m128(r, e);
51201	}
51202
51203	#[simd_test(enable = "avx512f")]
51204	fn test_mm512_cvtt_roundps_epi32() {
51205	let a = _mm512_setr_ps(
51206	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
51207	);
51208	let r = _mm512_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(a);
51209	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-3`, `4`, `-5`, `6`, `-7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
51210	assert_eq_m512i(r, e);
51211	}
51212
51213	#[simd_test(enable = "avx512f")]
51214	fn test_mm512_mask_cvtt_roundps_epi32() {
51215	let a = _mm512_setr_ps(
51216	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
51217	);
51218	let src = _mm512_set1_epi32(`0`);
51219	let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, `0`, a);
51220	assert_eq_m512i(r, src);
51221	let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, `0b00000000_11111111`, a);
51222	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-3`, `4`, `-5`, `6`, `-7`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51223	assert_eq_m512i(r, e);
51224	}
51225
51226	#[simd_test(enable = "avx512f")]
51227	fn test_mm512_maskz_cvtt_roundps_epi32() {
51228	let a = _mm512_setr_ps(
51229	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
51230	);
51231	let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(`0`, a);
51232	assert_eq_m512i(r, _mm512_setzero_si512());
51233	let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(`0b00000000_11111111`, a);
51234	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-3`, `4`, `-5`, `6`, `-7`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51235	assert_eq_m512i(r, e);
51236	}
51237
51238	#[simd_test(enable = "avx512f")]
51239	fn test_mm512_cvtt_roundps_epu32() {
51240	let a = _mm512_setr_ps(
51241	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
51242	);
51243	let r = _mm512_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(a);
51244	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
51245	assert_eq_m512i(r, e);
51246	}
51247
51248	#[simd_test(enable = "avx512f")]
51249	fn test_mm512_mask_cvtt_roundps_epu32() {
51250	let a = _mm512_setr_ps(
51251	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
51252	);
51253	let src = _mm512_set1_epi32(`0`);
51254	let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, `0`, a);
51255	assert_eq_m512i(r, src);
51256	let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, `0b00000000_11111111`, a);
51257	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51258	assert_eq_m512i(r, e);
51259	}
51260
51261	#[simd_test(enable = "avx512f")]
51262	fn test_mm512_maskz_cvtt_roundps_epu32() {
51263	let a = _mm512_setr_ps(
51264	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
51265	);
51266	let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(`0`, a);
51267	assert_eq_m512i(r, _mm512_setzero_si512());
51268	let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(`0b00000000_11111111`, a);
51269	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51270	assert_eq_m512i(r, e);
51271	}
51272
51273	#[simd_test(enable = "avx512f")]
51274	fn test_mm512_cvttps_epi32() {
51275	let a = _mm512_setr_ps(
51276	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
51277	);
51278	let r = _mm512_cvttps_epi32(a);
51279	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-3`, `4`, `-5`, `6`, `-7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
51280	assert_eq_m512i(r, e);
51281	}
51282
51283	#[simd_test(enable = "avx512f")]
51284	fn test_mm512_mask_cvttps_epi32() {
51285	let a = _mm512_setr_ps(
51286	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
51287	);
51288	let src = _mm512_set1_epi32(`0`);
51289	let r = _mm512_mask_cvttps_epi32(src, `0`, a);
51290	assert_eq_m512i(r, src);
51291	let r = _mm512_mask_cvttps_epi32(src, `0b00000000_11111111`, a);
51292	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-3`, `4`, `-5`, `6`, `-7`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51293	assert_eq_m512i(r, e);
51294	}
51295
51296	#[simd_test(enable = "avx512f")]
51297	fn test_mm512_maskz_cvttps_epi32() {
51298	let a = _mm512_setr_ps(
51299	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
51300	);
51301	let r = _mm512_maskz_cvttps_epi32(`0`, a);
51302	assert_eq_m512i(r, _mm512_setzero_si512());
51303	let r = _mm512_maskz_cvttps_epi32(`0b00000000_11111111`, a);
51304	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-3`, `4`, `-5`, `6`, `-7`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51305	assert_eq_m512i(r, e);
51306	}
51307
51308	#[simd_test(enable = "avx512f,avx512vl")]
51309	fn test_mm256_mask_cvttps_epi32() {
51310	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
51311	let src = _mm256_set1_epi32(`0`);
51312	let r = _mm256_mask_cvttps_epi32(src, `0`, a);
51313	assert_eq_m256i(r, src);
51314	let r = _mm256_mask_cvttps_epi32(src, `0b11111111`, a);
51315	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
51316	assert_eq_m256i(r, e);
51317	}
51318
51319	#[simd_test(enable = "avx512f,avx512vl")]
51320	fn test_mm256_maskz_cvttps_epi32() {
51321	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
51322	let r = _mm256_maskz_cvttps_epi32(`0`, a);
51323	assert_eq_m256i(r, _mm256_setzero_si256());
51324	let r = _mm256_maskz_cvttps_epi32(`0b11111111`, a);
51325	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
51326	assert_eq_m256i(r, e);
51327	}
51328
51329	#[simd_test(enable = "avx512f,avx512vl")]
51330	fn test_mm_mask_cvttps_epi32() {
51331	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
51332	let src = _mm_set1_epi32(`0`);
51333	let r = _mm_mask_cvttps_epi32(src, `0`, a);
51334	assert_eq_m128i(r, src);
51335	let r = _mm_mask_cvttps_epi32(src, `0b00001111`, a);
51336	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
51337	assert_eq_m128i(r, e);
51338	}
51339
51340	#[simd_test(enable = "avx512f,avx512vl")]
51341	fn test_mm_maskz_cvttps_epi32() {
51342	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
51343	let r = _mm_maskz_cvttps_epi32(`0`, a);
51344	assert_eq_m128i(r, _mm_setzero_si128());
51345	let r = _mm_maskz_cvttps_epi32(`0b00001111`, a);
51346	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
51347	assert_eq_m128i(r, e);
51348	}
51349
51350	#[simd_test(enable = "avx512f")]
51351	fn test_mm512_cvttps_epu32() {
51352	let a = _mm512_setr_ps(
51353	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
51354	);
51355	let r = _mm512_cvttps_epu32(a);
51356	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
51357	assert_eq_m512i(r, e);
51358	}
51359
51360	#[simd_test(enable = "avx512f")]
51361	fn test_mm512_mask_cvttps_epu32() {
51362	let a = _mm512_setr_ps(
51363	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
51364	);
51365	let src = _mm512_set1_epi32(`0`);
51366	let r = _mm512_mask_cvttps_epu32(src, `0`, a);
51367	assert_eq_m512i(r, src);
51368	let r = _mm512_mask_cvttps_epu32(src, `0b00000000_11111111`, a);
51369	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51370	assert_eq_m512i(r, e);
51371	}
51372
51373	#[simd_test(enable = "avx512f")]
51374	fn test_mm512_maskz_cvttps_epu32() {
51375	let a = _mm512_setr_ps(
51376	`0.`, `-1.5`, `2.`, `-3.5`, `4.`, `-5.5`, `6.`, `-7.5`, `8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`,
51377	);
51378	let r = _mm512_maskz_cvttps_epu32(`0`, a);
51379	assert_eq_m512i(r, _mm512_setzero_si512());
51380	let r = _mm512_maskz_cvttps_epu32(`0b00000000_11111111`, a);
51381	let e = _mm512_setr_epi32(`0`, `-1`, `2`, `-1`, `4`, `-1`, `6`, `-1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
51382	assert_eq_m512i(r, e);
51383	}
51384
51385	#[simd_test(enable = "avx512f,avx512vl")]
51386	fn test_mm256_cvttps_epu32() {
51387	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
51388	let r = _mm256_cvttps_epu32(a);
51389	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
51390	assert_eq_m256i(r, e);
51391	}
51392
51393	#[simd_test(enable = "avx512f,avx512vl")]
51394	fn test_mm256_mask_cvttps_epu32() {
51395	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
51396	let src = _mm256_set1_epi32(`0`);
51397	let r = _mm256_mask_cvttps_epu32(src, `0`, a);
51398	assert_eq_m256i(r, src);
51399	let r = _mm256_mask_cvttps_epu32(src, `0b11111111`, a);
51400	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
51401	assert_eq_m256i(r, e);
51402	}
51403
51404	#[simd_test(enable = "avx512f,avx512vl")]
51405	fn test_mm256_maskz_cvttps_epu32() {
51406	let a = _mm256_set_ps(`8.`, `9.5`, `10.`, `11.5`, `12.`, `13.5`, `14.`, `15.5`);
51407	let r = _mm256_maskz_cvttps_epu32(`0`, a);
51408	assert_eq_m256i(r, _mm256_setzero_si256());
51409	let r = _mm256_maskz_cvttps_epu32(`0b11111111`, a);
51410	let e = _mm256_set_epi32(`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
51411	assert_eq_m256i(r, e);
51412	}
51413
51414	#[simd_test(enable = "avx512f,avx512vl")]
51415	fn test_mm_cvttps_epu32() {
51416	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
51417	let r = _mm_cvttps_epu32(a);
51418	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
51419	assert_eq_m128i(r, e);
51420	}
51421
51422	#[simd_test(enable = "avx512f,avx512vl")]
51423	fn test_mm_mask_cvttps_epu32() {
51424	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
51425	let src = _mm_set1_epi32(`0`);
51426	let r = _mm_mask_cvttps_epu32(src, `0`, a);
51427	assert_eq_m128i(r, src);
51428	let r = _mm_mask_cvttps_epu32(src, `0b00001111`, a);
51429	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
51430	assert_eq_m128i(r, e);
51431	}
51432
51433	#[simd_test(enable = "avx512f,avx512vl")]
51434	fn test_mm_maskz_cvttps_epu32() {
51435	let a = _mm_set_ps(`12.`, `13.5`, `14.`, `15.5`);
51436	let r = _mm_maskz_cvttps_epu32(`0`, a);
51437	assert_eq_m128i(r, _mm_setzero_si128());
51438	let r = _mm_maskz_cvttps_epu32(`0b00001111`, a);
51439	let e = _mm_set_epi32(`12`, `13`, `14`, `15`);
51440	assert_eq_m128i(r, e);
51441	}
51442
51443	#[simd_test(enable = "avx512f")]
51444	fn test_mm512_i32gather_ps() {
51445	let arr: [f32; `256`] = core::array::from_fn(\|i\| i as f32);
51446	// A multiplier of 4 is word-addressing
51447	#[rustfmt::skip]
51448	let index = _mm512_setr_epi32(`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`,
51449	`120`, `128`, `136`, `144`, `152`, `160`, `168`, `176`);
51450	let r = unsafe { _mm512_i32gather_ps::<`4`>(index, arr.as_ptr()) };
51451	#[rustfmt::skip]
51452	assert_eq_m512(r, _mm512_setr_ps(`0.`, `16.`, `32.`, `48.`, `64.`, `80.`, `96.`, `112.`,
51453	`120.`, `128.`, `136.`, `144.`, `152.`, `160.`, `168.`, `176.`));
51454	}
51455
51456	#[simd_test(enable = "avx512f")]
51457	fn test_mm512_mask_i32gather_ps() {
51458	let arr: [f32; `256`] = core::array::from_fn(\|i\| i as f32);
51459	let src = _mm512_set1_ps(`2.`);
51460	let mask = `0b10101010_10101010`;
51461	#[rustfmt::skip]
51462	let index = _mm512_setr_epi32(`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`,
51463	`120`, `128`, `136`, `144`, `152`, `160`, `168`, `176`);
51464	// A multiplier of 4 is word-addressing
51465	let r = unsafe { _mm512_mask_i32gather_ps::<`4`>(src, mask, index, arr.as_ptr()) };
51466	#[rustfmt::skip]
51467	assert_eq_m512(r, _mm512_setr_ps(`2.`, `16.`, `2.`, `48.`, `2.`, `80.`, `2.`, `112.`,
51468	`2.`, `128.`, `2.`, `144.`, `2.`, `160.`, `2.`, `176.`));
51469	}
51470
51471	#[simd_test(enable = "avx512f")]
51472	fn test_mm512_i32gather_epi32() {
51473	let arr: [i32; `256`] = core::array::from_fn(\|i\| i as i32);
51474	// A multiplier of 4 is word-addressing
51475	#[rustfmt::skip]
51476	let index = _mm512_setr_epi32(`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`,
51477	`120`, `128`, `136`, `144`, `152`, `160`, `168`, `176`);
51478	let r = unsafe { _mm512_i32gather_epi32::<`4`>(index, arr.as_ptr()) };
51479	#[rustfmt::skip]
51480	assert_eq_m512i(r, _mm512_setr_epi32(`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`,
51481	`120`, `128`, `136`, `144`, `152`, `160`, `168`, `176`));
51482	}
51483
51484	#[simd_test(enable = "avx512f")]
51485	fn test_mm512_mask_i32gather_epi32() {
51486	let arr: [i32; `256`] = core::array::from_fn(\|i\| i as i32);
51487	let src = _mm512_set1_epi32(`2`);
51488	let mask = `0b10101010_10101010`;
51489	let index = _mm512_setr_epi32(
51490	`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`, `128`, `144`, `160`, `176`, `192`, `208`, `224`, `240`,
51491	);
51492	// A multiplier of 4 is word-addressing
51493	let r = unsafe { _mm512_mask_i32gather_epi32::<`4`>(src, mask, index, arr.as_ptr()) };
51494	assert_eq_m512i(
51495	r,
51496	_mm512_setr_epi32(`2`, `16`, `2`, `48`, `2`, `80`, `2`, `112`, `2`, `144`, `2`, `176`, `2`, `208`, `2`, `240`),
51497	);
51498	}
51499
51500	#[simd_test(enable = "avx512f")]
51501	fn test_mm512_i32scatter_ps() {
51502	let mut arr = [`0f32`; `256`];
51503	#[rustfmt::skip]
51504	let index = _mm512_setr_epi32(`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`,
51505	`128`, `144`, `160`, `176`, `192`, `208`, `224`, `240`);
51506	let src = _mm512_setr_ps(
51507	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
51508	);
51509	// A multiplier of 4 is word-addressing
51510	unsafe {
51511	_mm512_i32scatter_ps::<`4`>(arr.as_mut_ptr(), index, src);
51512	}
51513	let mut expected = [`0f32`; `256`];
51514	for i in `0`..`16` {
51515	expected[i * `16`] = (i + `1`) as f32;
51516	}
51517	assert_eq!(&arr[..], &expected[..],);
51518	}
51519
51520	#[simd_test(enable = "avx512f")]
51521	fn test_mm512_mask_i32scatter_ps() {
51522	let mut arr = [`0f32`; `256`];
51523	let mask = `0b10101010_10101010`;
51524	#[rustfmt::skip]
51525	let index = _mm512_setr_epi32(`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`,
51526	`128`, `144`, `160`, `176`, `192`, `208`, `224`, `240`);
51527	let src = _mm512_setr_ps(
51528	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
51529	);
51530	// A multiplier of 4 is word-addressing
51531	unsafe {
51532	_mm512_mask_i32scatter_ps::<`4`>(arr.as_mut_ptr(), mask, index, src);
51533	}
51534	let mut expected = [`0f32`; `256`];
51535	for i in `0`..`8` {
51536	expected[i * `32` + `16`] = `2.` * (i + `1`) as f32;
51537	}
51538	assert_eq!(&arr[..], &expected[..],);
51539	}
51540
51541	#[simd_test(enable = "avx512f")]
51542	fn test_mm512_i32scatter_epi32() {
51543	let mut arr = [`0i32`; `256`];
51544	#[rustfmt::skip]
51545
51546	let index = _mm512_setr_epi32(`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`,
51547	`128`, `144`, `160`, `176`, `192`, `208`, `224`, `240`);
51548	let src = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
51549	// A multiplier of 4 is word-addressing
51550	unsafe {
51551	_mm512_i32scatter_epi32::<`4`>(arr.as_mut_ptr(), index, src);
51552	}
51553	let mut expected = [`0i32`; `256`];
51554	for i in `0`..`16` {
51555	expected[i * `16`] = (i + `1`) as i32;
51556	}
51557	assert_eq!(&arr[..], &expected[..],);
51558	}
51559
51560	#[simd_test(enable = "avx512f")]
51561	fn test_mm512_mask_i32scatter_epi32() {
51562	let mut arr = [`0i32`; `256`];
51563	let mask = `0b10101010_10101010`;
51564	#[rustfmt::skip]
51565	let index = _mm512_setr_epi32(`0`, `16`, `32`, `48`, `64`, `80`, `96`, `112`,
51566	`128`, `144`, `160`, `176`, `192`, `208`, `224`, `240`);
51567	let src = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
51568	// A multiplier of 4 is word-addressing
51569	unsafe {
51570	_mm512_mask_i32scatter_epi32::<`4`>(arr.as_mut_ptr(), mask, index, src);
51571	}
51572	let mut expected = [`0i32`; `256`];
51573	for i in `0`..`8` {
51574	expected[i * `32` + `16`] = `2` * (i + `1`) as i32;
51575	}
51576	assert_eq!(&arr[..], &expected[..],);
51577	}
51578
51579	#[simd_test(enable = "avx512f")]
51580	fn test_mm512_cmplt_ps_mask() {
51581	#[rustfmt::skip]
51582	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`,
51583	`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`);
51584	let b = _mm512_set1_ps(`-1.`);
51585	let m = _mm512_cmplt_ps_mask(a, b);
51586	assert_eq!(m, `0b00000101_00000101`);
51587	}
51588
51589	#[simd_test(enable = "avx512f")]
51590	fn test_mm512_mask_cmplt_ps_mask() {
51591	#[rustfmt::skip]
51592	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`,
51593	`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`);
51594	let b = _mm512_set1_ps(`-1.`);
51595	let mask = `0b01100110_01100110`;
51596	let r = _mm512_mask_cmplt_ps_mask(mask, a, b);
51597	assert_eq!(r, `0b00000100_00000100`);
51598	}
51599
51600	#[simd_test(enable = "avx512f")]
51601	fn test_mm512_cmpnlt_ps_mask() {
51602	#[rustfmt::skip]
51603	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`,
51604	`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`);
51605	let b = _mm512_set1_ps(`-1.`);
51606	assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b));
51607	}
51608
51609	#[simd_test(enable = "avx512f")]
51610	fn test_mm512_mask_cmpnlt_ps_mask() {
51611	#[rustfmt::skip]
51612	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`,
51613	`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`);
51614	let b = _mm512_set1_ps(`-1.`);
51615	let mask = `0b01111010_01111010`;
51616	assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), `0b01111010_01111010`);
51617	}
51618
51619	#[simd_test(enable = "avx512f")]
51620	fn test_mm512_cmpnle_ps_mask() {
51621	#[rustfmt::skip]
51622	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`,
51623	`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`);
51624	let b = _mm512_set1_ps(`-1.`);
51625	let m = _mm512_cmpnle_ps_mask(b, a);
51626	assert_eq!(m, `0b00001101_00001101`);
51627	}
51628
51629	#[simd_test(enable = "avx512f")]
51630	fn test_mm512_mask_cmpnle_ps_mask() {
51631	#[rustfmt::skip]
51632	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`,
51633	`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`);
51634	let b = _mm512_set1_ps(`-1.`);
51635	let mask = `0b01100110_01100110`;
51636	let r = _mm512_mask_cmpnle_ps_mask(mask, b, a);
51637	assert_eq!(r, `0b00000100_00000100`);
51638	}
51639
51640	#[simd_test(enable = "avx512f")]
51641	fn test_mm512_cmple_ps_mask() {
51642	#[rustfmt::skip]
51643	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`,
51644	`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`);
51645	let b = _mm512_set1_ps(`-1.`);
51646	assert_eq!(_mm512_cmple_ps_mask(a, b), `0b00100101_00100101`);
51647	}
51648
51649	#[simd_test(enable = "avx512f")]
51650	fn test_mm512_mask_cmple_ps_mask() {
51651	#[rustfmt::skip]
51652	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`,
51653	`0.`, `1.`, `-1.`, f32::MAX, f32::NAN, f32::MIN, `100.`, `-100.`);
51654	let b = _mm512_set1_ps(`-1.`);
51655	let mask = `0b01111010_01111010`;
51656	assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), `0b00100000_00100000`);
51657	}
51658
51659	#[simd_test(enable = "avx512f")]
51660	fn test_mm512_cmpeq_ps_mask() {
51661	#[rustfmt::skip]
51662	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, f32::NAN, `-100.`,
51663	`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, f32::NAN, `-100.`);
51664	#[rustfmt::skip]
51665	let b = _mm512_set_ps(`0.`, `1.`, `13.`, `42.`, f32::MAX, f32::MIN, f32::NAN, `-100.`,
51666	`0.`, `1.`, `13.`, `42.`, f32::MAX, f32::MIN, f32::NAN, `-100.`);
51667	let m = _mm512_cmpeq_ps_mask(b, a);
51668	assert_eq!(m, `0b11001101_11001101`);
51669	}
51670
51671	#[simd_test(enable = "avx512f")]
51672	fn test_mm512_mask_cmpeq_ps_mask() {
51673	#[rustfmt::skip]
51674	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, f32::NAN, `-100.`,
51675	`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, f32::NAN, `-100.`);
51676	#[rustfmt::skip]
51677	let b = _mm512_set_ps(`0.`, `1.`, `13.`, `42.`, f32::MAX, f32::MIN, f32::NAN, `-100.`,
51678	`0.`, `1.`, `13.`, `42.`, f32::MAX, f32::MIN, f32::NAN, `-100.`);
51679	let mask = `0b01111010_01111010`;
51680	let r = _mm512_mask_cmpeq_ps_mask(mask, b, a);
51681	assert_eq!(r, `0b01001000_01001000`);
51682	}
51683
51684	#[simd_test(enable = "avx512f")]
51685	fn test_mm512_cmpneq_ps_mask() {
51686	#[rustfmt::skip]
51687	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, f32::NAN, `-100.`,
51688	`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, f32::NAN, `-100.`);
51689	#[rustfmt::skip]
51690	let b = _mm512_set_ps(`0.`, `1.`, `13.`, `42.`, f32::MAX, f32::MIN, f32::NAN, `-100.`,
51691	`0.`, `1.`, `13.`, `42.`, f32::MAX, f32::MIN, f32::NAN, `-100.`);
51692	let m = _mm512_cmpneq_ps_mask(b, a);
51693	assert_eq!(m, `0b00110010_00110010`);
51694	}
51695
51696	#[simd_test(enable = "avx512f")]
51697	fn test_mm512_mask_cmpneq_ps_mask() {
51698	#[rustfmt::skip]
51699	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, f32::NAN, `-100.`,
51700	`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, f32::NAN, `-100.`);
51701	#[rustfmt::skip]
51702	let b = _mm512_set_ps(`0.`, `1.`, `13.`, `42.`, f32::MAX, f32::MIN, f32::NAN, `-100.`,
51703	`0.`, `1.`, `13.`, `42.`, f32::MAX, f32::MIN, f32::NAN, `-100.`);
51704	let mask = `0b01111010_01111010`;
51705	let r = _mm512_mask_cmpneq_ps_mask(mask, b, a);
51706	assert_eq!(r, `0b00110010_00110010`)
51707	}
51708
51709	#[simd_test(enable = "avx512f")]
51710	fn test_mm512_cmp_ps_mask() {
51711	#[rustfmt::skip]
51712	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`,
51713	`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`);
51714	let b = _mm512_set1_ps(`-1.`);
51715	let m = _mm512_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
51716	assert_eq!(m, `0b00000101_00000101`);
51717	}
51718
51719	#[simd_test(enable = "avx512f")]
51720	fn test_mm512_mask_cmp_ps_mask() {
51721	#[rustfmt::skip]
51722	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`,
51723	`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`);
51724	let b = _mm512_set1_ps(`-1.`);
51725	let mask = `0b01100110_01100110`;
51726	let r = _mm512_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
51727	assert_eq!(r, `0b00000100_00000100`);
51728	}
51729
51730	#[simd_test(enable = "avx512f,avx512vl")]
51731	fn test_mm256_cmp_ps_mask() {
51732	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`);
51733	let b = _mm256_set1_ps(`-1.`);
51734	let m = _mm256_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
51735	assert_eq!(m, `0b00000101`);
51736	}
51737
51738	#[simd_test(enable = "avx512f,avx512vl")]
51739	fn test_mm256_mask_cmp_ps_mask() {
51740	let a = _mm256_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`);
51741	let b = _mm256_set1_ps(`-1.`);
51742	let mask = `0b01100110`;
51743	let r = _mm256_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
51744	assert_eq!(r, `0b00000100`);
51745	}
51746
51747	#[simd_test(enable = "avx512f,avx512vl")]
51748	fn test_mm_cmp_ps_mask() {
51749	let a = _mm_set_ps(`0.`, `1.`, `-1.`, `13.`);
51750	let b = _mm_set1_ps(`1.`);
51751	let m = _mm_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
51752	assert_eq!(m, `0b00001010`);
51753	}
51754
51755	#[simd_test(enable = "avx512f,avx512vl")]
51756	fn test_mm_mask_cmp_ps_mask() {
51757	let a = _mm_set_ps(`0.`, `1.`, `-1.`, `13.`);
51758	let b = _mm_set1_ps(`1.`);
51759	let mask = `0b11111111`;
51760	let r = _mm_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
51761	assert_eq!(r, `0b00001010`);
51762	}
51763
51764	#[simd_test(enable = "avx512f")]
51765	fn test_mm512_cmp_round_ps_mask() {
51766	#[rustfmt::skip]
51767	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`,
51768	`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`);
51769	let b = _mm512_set1_ps(`-1.`);
51770	let m = _mm512_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(a, b);
51771	assert_eq!(m, `0b00000101_00000101`);
51772	}
51773
51774	#[simd_test(enable = "avx512f")]
51775	fn test_mm512_mask_cmp_round_ps_mask() {
51776	#[rustfmt::skip]
51777	let a = _mm512_set_ps(`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`,
51778	`0.`, `1.`, `-1.`, `13.`, f32::MAX, f32::MIN, `100.`, `-100.`);
51779	let b = _mm512_set1_ps(`-1.`);
51780	let mask = `0b01100110_01100110`;
51781	let r = _mm512_mask_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(mask, a, b);
51782	assert_eq!(r, `0b00000100_00000100`);
51783	}
51784
51785	#[simd_test(enable = "avx512f")]
51786	fn test_mm512_cmpord_ps_mask() {
51787	#[rustfmt::skip]
51788	let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, `-1.`, f32::NAN, `0.`,
51789	f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, `1.`, f32::NAN, `2.`);
51790	#[rustfmt::skip]
51791	let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, `-1.`, `0.`,
51792	f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, `-1.`, `2.`);
51793	let m = _mm512_cmpord_ps_mask(a, b);
51794	assert_eq!(m, `0b00000101_00000101`);
51795	}
51796
51797	#[simd_test(enable = "avx512f")]
51798	fn test_mm512_mask_cmpord_ps_mask() {
51799	#[rustfmt::skip]
51800	let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, `-1.`, f32::NAN, `0.`,
51801	f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, `1.`, f32::NAN, `2.`);
51802	#[rustfmt::skip]
51803	let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, `-1.`, `0.`,
51804	f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, `-1.`, `2.`);
51805	let mask = `0b11000011_11000011`;
51806	let m = _mm512_mask_cmpord_ps_mask(mask, a, b);
51807	assert_eq!(m, `0b00000001_00000001`);
51808	}
51809
51810	#[simd_test(enable = "avx512f")]
51811	fn test_mm512_cmpunord_ps_mask() {
51812	#[rustfmt::skip]
51813	let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, `-1.`, f32::NAN, `0.`,
51814	f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, `1.`, f32::NAN, `2.`);
51815	#[rustfmt::skip]
51816	let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, `-1.`, `0.`,
51817	f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, `-1.`, `2.`);
51818	let m = _mm512_cmpunord_ps_mask(a, b);
51819
51820	assert_eq!(m, `0b11111010_11111010`);
51821	}
51822
51823	#[simd_test(enable = "avx512f")]
51824	fn test_mm512_mask_cmpunord_ps_mask() {
51825	#[rustfmt::skip]
51826	let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, `-1.`, f32::NAN, `0.`,
51827	f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, `1.`, f32::NAN, `2.`);
51828	#[rustfmt::skip]
51829	let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, `-1.`, `0.`,
51830	f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, `-1.`, `2.`);
51831	let mask = `0b00001111_00001111`;
51832	let m = _mm512_mask_cmpunord_ps_mask(mask, a, b);
51833	assert_eq!(m, `0b000001010_00001010`);
51834	}
51835
51836	#[simd_test(enable = "avx512f")]
51837	fn test_mm_cmp_ss_mask() {
51838	let a = _mm_setr_ps(`2.`, `1.`, `1.`, `1.`);
51839	let b = _mm_setr_ps(`1.`, `2.`, `2.`, `2.`);
51840	let m = _mm_cmp_ss_mask::<_CMP_GE_OS>(a, b);
51841	assert_eq!(m, `1`);
51842	}
51843
51844	#[simd_test(enable = "avx512f")]
51845	fn test_mm_mask_cmp_ss_mask() {
51846	let a = _mm_setr_ps(`2.`, `1.`, `1.`, `1.`);
51847	let b = _mm_setr_ps(`1.`, `2.`, `2.`, `2.`);
51848	let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(`0b10`, a, b);
51849	assert_eq!(m, `0`);
51850	let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(`0b1`, a, b);
51851	assert_eq!(m, `1`);
51852	}
51853
51854	#[simd_test(enable = "avx512f")]
51855	fn test_mm_cmp_round_ss_mask() {
51856	let a = _mm_setr_ps(`2.`, `1.`, `1.`, `1.`);
51857	let b = _mm_setr_ps(`1.`, `2.`, `2.`, `2.`);
51858	let m = _mm_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
51859	assert_eq!(m, `1`);
51860	}
51861
51862	#[simd_test(enable = "avx512f")]
51863	fn test_mm_mask_cmp_round_ss_mask() {
51864	let a = _mm_setr_ps(`2.`, `1.`, `1.`, `1.`);
51865	let b = _mm_setr_ps(`1.`, `2.`, `2.`, `2.`);
51866	let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(`0b10`, a, b);
51867	assert_eq!(m, `0`);
51868	let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(`0b1`, a, b);
51869	assert_eq!(m, `1`);
51870	}
51871
51872	#[simd_test(enable = "avx512f")]
51873	fn test_mm_cmp_sd_mask() {
51874	let a = _mm_setr_pd(`2.`, `1.`);
51875	let b = _mm_setr_pd(`1.`, `2.`);
51876	let m = _mm_cmp_sd_mask::<_CMP_GE_OS>(a, b);
51877	assert_eq!(m, `1`);
51878	}
51879
51880	#[simd_test(enable = "avx512f")]
51881	fn test_mm_mask_cmp_sd_mask() {
51882	let a = _mm_setr_pd(`2.`, `1.`);
51883	let b = _mm_setr_pd(`1.`, `2.`);
51884	let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(`0b10`, a, b);
51885	assert_eq!(m, `0`);
51886	let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(`0b1`, a, b);
51887	assert_eq!(m, `1`);
51888	}
51889
51890	#[simd_test(enable = "avx512f")]
51891	fn test_mm_cmp_round_sd_mask() {
51892	let a = _mm_setr_pd(`2.`, `1.`);
51893	let b = _mm_setr_pd(`1.`, `2.`);
51894	let m = _mm_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
51895	assert_eq!(m, `1`);
51896	}
51897
51898	#[simd_test(enable = "avx512f")]
51899	fn test_mm_mask_cmp_round_sd_mask() {
51900	let a = _mm_setr_pd(`2.`, `1.`);
51901	let b = _mm_setr_pd(`1.`, `2.`);
51902	let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(`0b10`, a, b);
51903	assert_eq!(m, `0`);
51904	let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(`0b1`, a, b);
51905	assert_eq!(m, `1`);
51906	}
51907
51908	#[simd_test(enable = "avx512f")]
51909	const fn test_mm512_cmplt_epu32_mask() {
51910	#[rustfmt::skip]
51911	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
51912	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
51913	let b = _mm512_set1_epi32(`-1`);
51914	let m = _mm512_cmplt_epu32_mask(a, b);
51915	assert_eq!(m, `0b11001111_11001111`);
51916	}
51917
51918	#[simd_test(enable = "avx512f")]
51919	const fn test_mm512_mask_cmplt_epu32_mask() {
51920	#[rustfmt::skip]
51921	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
51922	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
51923	let b = _mm512_set1_epi32(`-1`);
51924	let mask = `0b01111010_01111010`;
51925	let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
51926	assert_eq!(r, `0b01001010_01001010`);
51927	}
51928
51929	#[simd_test(enable = "avx512f,avx512vl")]
51930	const fn test_mm256_cmplt_epu32_mask() {
51931	let a = _mm256_set_epi32(`0`, `1`, `2`, u32::MAX as i32, i32::MAX, `101`, `100`, `99`);
51932	let b = _mm256_set1_epi32(`1`);
51933	let r = _mm256_cmplt_epu32_mask(a, b);
51934	assert_eq!(r, `0b10000000`);
51935	}
51936
51937	#[simd_test(enable = "avx512f,avx512vl")]
51938	const fn test_mm256_mask_cmplt_epu32_mask() {
51939	let a = _mm256_set_epi32(`0`, `1`, `2`, u32::MAX as i32, i32::MAX, `101`, `100`, `99`);
51940	let b = _mm256_set1_epi32(`1`);
51941	let mask = `0b11111111`;
51942	let r = _mm256_mask_cmplt_epu32_mask(mask, a, b);
51943	assert_eq!(r, `0b10000000`);
51944	}
51945
51946	#[simd_test(enable = "avx512f,avx512vl")]
51947	const fn test_mm_cmplt_epu32_mask() {
51948	let a = _mm_set_epi32(`0`, `1`, `2`, u32::MAX as i32);
51949	let b = _mm_set1_epi32(`1`);
51950	let r = _mm_cmplt_epu32_mask(a, b);
51951	assert_eq!(r, `0b00001000`);
51952	}
51953
51954	#[simd_test(enable = "avx512f,avx512vl")]
51955	const fn test_mm_mask_cmplt_epu32_mask() {
51956	let a = _mm_set_epi32(`0`, `1`, `2`, u32::MAX as i32);
51957	let b = _mm_set1_epi32(`1`);
51958	let mask = `0b11111111`;
51959	let r = _mm_mask_cmplt_epu32_mask(mask, a, b);
51960	assert_eq!(r, `0b00001000`);
51961	}
51962
51963	#[simd_test(enable = "avx512f")]
51964	const fn test_mm512_cmpgt_epu32_mask() {
51965	#[rustfmt::skip]
51966	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
51967	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
51968	let b = _mm512_set1_epi32(`-1`);
51969	let m = _mm512_cmpgt_epu32_mask(b, a);
51970	assert_eq!(m, `0b11001111_11001111`);
51971	}
51972
51973	#[simd_test(enable = "avx512f")]
51974	const fn test_mm512_mask_cmpgt_epu32_mask() {
51975	#[rustfmt::skip]
51976	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
51977	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
51978	let b = _mm512_set1_epi32(`-1`);
51979	let mask = `0b01111010_01111010`;
51980	let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a);
51981	assert_eq!(r, `0b01001010_01001010`);
51982	}
51983
51984	#[simd_test(enable = "avx512f,avx512vl")]
51985	const fn test_mm256_cmpgt_epu32_mask() {
51986	let a = _mm256_set_epi32(`0`, `1`, `2`, u32::MAX as i32, i32::MAX, `99`, `100`, `101`);
51987	let b = _mm256_set1_epi32(`1`);
51988	let r = _mm256_cmpgt_epu32_mask(a, b);
51989	assert_eq!(r, `0b00111111`);
51990	}
51991
51992	#[simd_test(enable = "avx512f,avx512vl")]
51993	const fn test_mm256_mask_cmpgt_epu32_mask() {
51994	let a = _mm256_set_epi32(`0`, `1`, `2`, u32::MAX as i32, i32::MAX, `99`, `100`, `101`);
51995	let b = _mm256_set1_epi32(`1`);
51996	let mask = `0b11111111`;
51997	let r = _mm256_mask_cmpgt_epu32_mask(mask, a, b);
51998	assert_eq!(r, `0b00111111`);
51999	}
52000
52001	#[simd_test(enable = "avx512f,avx512vl")]
52002	const fn test_mm_cmpgt_epu32_mask() {
52003	let a = _mm_set_epi32(`0`, `1`, `2`, u32::MAX as i32);
52004	let b = _mm_set1_epi32(`1`);
52005	let r = _mm_cmpgt_epu32_mask(a, b);
52006	assert_eq!(r, `0b00000011`);
52007	}
52008
52009	#[simd_test(enable = "avx512f,avx512vl")]
52010	const fn test_mm_mask_cmpgt_epu32_mask() {
52011	let a = _mm_set_epi32(`0`, `1`, `2`, u32::MAX as i32);
52012	let b = _mm_set1_epi32(`1`);
52013	let mask = `0b11111111`;
52014	let r = _mm_mask_cmpgt_epu32_mask(mask, a, b);
52015	assert_eq!(r, `0b00000011`);
52016	}
52017
52018	#[simd_test(enable = "avx512f")]
52019	const fn test_mm512_cmple_epu32_mask() {
52020	#[rustfmt::skip]
52021	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
52022	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
52023	let b = _mm512_set1_epi32(`-1`);
52024	assert_eq!(
52025	_mm512_cmple_epu32_mask(a, b),
52026	!_mm512_cmpgt_epu32_mask(a, b)
52027	)
52028	}
52029
52030	#[simd_test(enable = "avx512f")]
52031	const fn test_mm512_mask_cmple_epu32_mask() {
52032	#[rustfmt::skip]
52033	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
52034	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
52035	let b = _mm512_set1_epi32(`-1`);
52036	let mask = `0b01111010_01111010`;
52037	assert_eq!(
52038	_mm512_mask_cmple_epu32_mask(mask, a, b),
52039	`0b01111010_01111010`
52040	);
52041	}
52042
52043	#[simd_test(enable = "avx512f,avx512vl")]
52044	const fn test_mm256_cmple_epu32_mask() {
52045	let a = _mm256_set_epi32(`0`, `1`, `2`, u32::MAX as i32, i32::MAX, `200`, `100`, `101`);
52046	let b = _mm256_set1_epi32(`1`);
52047	let r = _mm256_cmple_epu32_mask(a, b);
52048	assert_eq!(r, `0b11000000`)
52049	}
52050
52051	#[simd_test(enable = "avx512f,avx512vl")]
52052	const fn test_mm256_mask_cmple_epu32_mask() {
52053	let a = _mm256_set_epi32(`0`, `1`, `2`, u32::MAX as i32, i32::MAX, `200`, `100`, `101`);
52054	let b = _mm256_set1_epi32(`1`);
52055	let mask = `0b11111111`;
52056	let r = _mm256_mask_cmple_epu32_mask(mask, a, b);
52057	assert_eq!(r, `0b11000000`)
52058	}
52059
52060	#[simd_test(enable = "avx512f,avx512vl")]
52061	const fn test_mm_cmple_epu32_mask() {
52062	let a = _mm_set_epi32(`0`, `1`, `2`, u32::MAX as i32);
52063	let b = _mm_set1_epi32(`1`);
52064	let r = _mm_cmple_epu32_mask(a, b);
52065	assert_eq!(r, `0b00001100`)
52066	}
52067
52068	#[simd_test(enable = "avx512f,avx512vl")]
52069	const fn test_mm_mask_cmple_epu32_mask() {
52070	let a = _mm_set_epi32(`0`, `1`, `2`, u32::MAX as i32);
52071	let b = _mm_set1_epi32(`1`);
52072	let mask = `0b11111111`;
52073	let r = _mm_mask_cmple_epu32_mask(mask, a, b);
52074	assert_eq!(r, `0b00001100`)
52075	}
52076
52077	#[simd_test(enable = "avx512f")]
52078	const fn test_mm512_cmpge_epu32_mask() {
52079	#[rustfmt::skip]
52080	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
52081	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
52082	let b = _mm512_set1_epi32(`-1`);
52083	assert_eq!(
52084	_mm512_cmpge_epu32_mask(a, b),
52085	!_mm512_cmplt_epu32_mask(a, b)
52086	)
52087	}
52088
52089	#[simd_test(enable = "avx512f")]
52090	const fn test_mm512_mask_cmpge_epu32_mask() {
52091	#[rustfmt::skip]
52092	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
52093	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
52094	let b = _mm512_set1_epi32(`-1`);
52095	let mask = `0b01111010_01111010`;
52096	assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), `0b01100000_0110000`);
52097	}
52098
52099	#[simd_test(enable = "avx512f,avx512vl")]
52100	const fn test_mm256_cmpge_epu32_mask() {
52101	let a = _mm256_set_epi32(`0`, `1`, `2`, u32::MAX as i32, i32::MAX, `300`, `100`, `200`);
52102	let b = _mm256_set1_epi32(`1`);
52103	let r = _mm256_cmpge_epu32_mask(a, b);
52104	assert_eq!(r, `0b01111111`)
52105	}
52106
52107	#[simd_test(enable = "avx512f,avx512vl")]
52108	const fn test_mm256_mask_cmpge_epu32_mask() {
52109	let a = _mm256_set_epi32(`0`, `1`, `2`, u32::MAX as i32, i32::MAX, `300`, `100`, `200`);
52110	let b = _mm256_set1_epi32(`1`);
52111	let mask = `0b11111111`;
52112	let r = _mm256_mask_cmpge_epu32_mask(mask, a, b);
52113	assert_eq!(r, `0b01111111`)
52114	}
52115
52116	#[simd_test(enable = "avx512f,avx512vl")]
52117	const fn test_mm_cmpge_epu32_mask() {
52118	let a = _mm_set_epi32(`0`, `1`, `2`, u32::MAX as i32);
52119	let b = _mm_set1_epi32(`1`);
52120	let r = _mm_cmpge_epu32_mask(a, b);
52121	assert_eq!(r, `0b00000111`)
52122	}
52123
52124	#[simd_test(enable = "avx512f,avx512vl")]
52125	const fn test_mm_mask_cmpge_epu32_mask() {
52126	let a = _mm_set_epi32(`0`, `1`, `2`, u32::MAX as i32);
52127	let b = _mm_set1_epi32(`1`);
52128	let mask = `0b11111111`;
52129	let r = _mm_mask_cmpge_epu32_mask(mask, a, b);
52130	assert_eq!(r, `0b00000111`)
52131	}
52132
52133	#[simd_test(enable = "avx512f")]
52134	const fn test_mm512_cmpeq_epu32_mask() {
52135	#[rustfmt::skip]
52136	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
52137	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
52138	#[rustfmt::skip]
52139	let b = _mm512_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`,
52140	`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
52141	let m = _mm512_cmpeq_epu32_mask(b, a);
52142	assert_eq!(m, `0b11001111_11001111`);
52143	}
52144
52145	#[simd_test(enable = "avx512f")]
52146	const fn test_mm512_mask_cmpeq_epu32_mask() {
52147	#[rustfmt::skip]
52148	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
52149	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
52150	#[rustfmt::skip]
52151	let b = _mm512_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`,
52152	`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
52153	let mask = `0b01111010_01111010`;
52154	let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a);
52155	assert_eq!(r, `0b01001010_01001010`);
52156	}
52157
52158	#[simd_test(enable = "avx512f,avx512vl")]
52159	const fn test_mm256_cmpeq_epu32_mask() {
52160	let a = _mm256_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
52161	let b = _mm256_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
52162	let m = _mm256_cmpeq_epu32_mask(b, a);
52163	assert_eq!(m, `0b11001111`);
52164	}
52165
52166	#[simd_test(enable = "avx512f,avx512vl")]
52167	const fn test_mm256_mask_cmpeq_epu32_mask() {
52168	let a = _mm256_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
52169	let b = _mm256_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
52170	let mask = `0b01111010`;
52171	let r = _mm256_mask_cmpeq_epu32_mask(mask, b, a);
52172	assert_eq!(r, `0b01001010`);
52173	}
52174
52175	#[simd_test(enable = "avx512f,avx512vl")]
52176	const fn test_mm_cmpeq_epu32_mask() {
52177	let a = _mm_set_epi32(`0`, `1`, `-1`, u32::MAX as i32);
52178	let b = _mm_set_epi32(`0`, `1`, `13`, `42`);
52179	let m = _mm_cmpeq_epu32_mask(b, a);
52180	assert_eq!(m, `0b00001100`);
52181	}
52182
52183	#[simd_test(enable = "avx512f,avx512vl")]
52184	const fn test_mm_mask_cmpeq_epu32_mask() {
52185	let a = _mm_set_epi32(`0`, `1`, `-1`, u32::MAX as i32);
52186	let b = _mm_set_epi32(`0`, `1`, `13`, `42`);
52187	let mask = `0b11111111`;
52188	let r = _mm_mask_cmpeq_epu32_mask(mask, b, a);
52189	assert_eq!(r, `0b00001100`);
52190	}
52191
52192	#[simd_test(enable = "avx512f")]
52193	const fn test_mm512_cmpneq_epu32_mask() {
52194	#[rustfmt::skip]
52195	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
52196	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
52197	#[rustfmt::skip]
52198	let b = _mm512_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`,
52199	`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
52200	let m = _mm512_cmpneq_epu32_mask(b, a);
52201	assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a));
52202	}
52203
52204	#[simd_test(enable = "avx512f")]
52205	const fn test_mm512_mask_cmpneq_epu32_mask() {
52206	#[rustfmt::skip]
52207	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `-100`, `100`,
52208	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `-100`, `100`);
52209	#[rustfmt::skip]
52210	let b = _mm512_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`,
52211	`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
52212	let mask = `0b01111010_01111010`;
52213	let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a);
52214	assert_eq!(r, `0b00110010_00110010`);
52215	}
52216
52217	#[simd_test(enable = "avx512f,avx512vl")]
52218	const fn test_mm256_cmpneq_epu32_mask() {
52219	let a = _mm256_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `-100`, `100`);
52220	let b = _mm256_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `-100`, `100`);
52221	let r = _mm256_cmpneq_epu32_mask(b, a);
52222	assert_eq!(r, `0b00110000`);
52223	}
52224
52225	#[simd_test(enable = "avx512f,avx512vl")]
52226	const fn test_mm256_mask_cmpneq_epu32_mask() {
52227	let a = _mm256_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `-100`, `100`);
52228	let b = _mm256_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `-100`, `100`);
52229	let mask = `0b11111111`;
52230	let r = _mm256_mask_cmpneq_epu32_mask(mask, b, a);
52231	assert_eq!(r, `0b00110000`);
52232	}
52233
52234	#[simd_test(enable = "avx512f,avx512vl")]
52235	const fn test_mm_cmpneq_epu32_mask() {
52236	let a = _mm_set_epi32(`0`, `1`, `-1`, u32::MAX as i32);
52237	let b = _mm_set_epi32(`0`, `1`, `13`, `42`);
52238	let r = _mm_cmpneq_epu32_mask(b, a);
52239	assert_eq!(r, `0b00000011`);
52240	}
52241
52242	#[simd_test(enable = "avx512f,avx512vl")]
52243	const fn test_mm_mask_cmpneq_epu32_mask() {
52244	let a = _mm_set_epi32(`0`, `1`, `-1`, u32::MAX as i32);
52245	let b = _mm_set_epi32(`0`, `1`, `13`, `42`);
52246	let mask = `0b11111111`;
52247	let r = _mm_mask_cmpneq_epu32_mask(mask, b, a);
52248	assert_eq!(r, `0b00000011`);
52249	}
52250
52251	#[simd_test(enable = "avx512f")]
52252	const fn test_mm512_cmp_epu32_mask() {
52253	#[rustfmt::skip]
52254	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
52255	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
52256	let b = _mm512_set1_epi32(`-1`);
52257	let m = _mm512_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
52258	assert_eq!(m, `0b11001111_11001111`);
52259	}
52260
52261	#[simd_test(enable = "avx512f")]
52262	const fn test_mm512_mask_cmp_epu32_mask() {
52263	#[rustfmt::skip]
52264	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
52265	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
52266	let b = _mm512_set1_epi32(`-1`);
52267	let mask = `0b01111010_01111010`;
52268	let r = _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
52269	assert_eq!(r, `0b01001010_01001010`);
52270	}
52271
52272	#[simd_test(enable = "avx512f,avx512vl")]
52273	const fn test_mm256_cmp_epu32_mask() {
52274	let a = _mm256_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
52275	let b = _mm256_set1_epi32(`-1`);
52276	let m = _mm256_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
52277	assert_eq!(m, `0b11001111`);
52278	}
52279
52280	#[simd_test(enable = "avx512f,avx512vl")]
52281	const fn test_mm256_mask_cmp_epu32_mask() {
52282	let a = _mm256_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
52283	let b = _mm256_set1_epi32(`-1`);
52284	let mask = `0b11111111`;
52285	let r = _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
52286	assert_eq!(r, `0b11001111`);
52287	}
52288
52289	#[simd_test(enable = "avx512f,avx512vl")]
52290	const fn test_mm_cmp_epu32_mask() {
52291	let a = _mm_set_epi32(`0`, `1`, `-1`, i32::MAX);
52292	let b = _mm_set1_epi32(`1`);
52293	let m = _mm_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
52294	assert_eq!(m, `0b00001000`);
52295	}
52296
52297	#[simd_test(enable = "avx512f,avx512vl")]
52298	const fn test_mm_mask_cmp_epu32_mask() {
52299	let a = _mm_set_epi32(`0`, `1`, `-1`, i32::MAX);
52300	let b = _mm_set1_epi32(`1`);
52301	let mask = `0b11111111`;
52302	let r = _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
52303	assert_eq!(r, `0b00001000`);
52304	}
52305
52306	#[simd_test(enable = "avx512f")]
52307	const fn test_mm512_cmplt_epi32_mask() {
52308	#[rustfmt::skip]
52309	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
52310	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
52311	let b = _mm512_set1_epi32(`-1`);
52312	let m = _mm512_cmplt_epi32_mask(a, b);
52313	assert_eq!(m, `0b00000101_00000101`);
52314	}
52315
52316	#[simd_test(enable = "avx512f")]
52317	const fn test_mm512_mask_cmplt_epi32_mask() {
52318	#[rustfmt::skip]
52319	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
52320	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
52321	let b = _mm512_set1_epi32(`-1`);
52322	let mask = `0b01100110_01100110`;
52323	let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
52324	assert_eq!(r, `0b00000100_00000100`);
52325	}
52326
52327	#[simd_test(enable = "avx512f,avx512vl")]
52328	const fn test_mm256_cmplt_epi32_mask() {
52329	let a = _mm256_set_epi32(`0`, `1`, `-1`, `101`, i32::MAX, i32::MIN, `100`, `-100`);
52330	let b = _mm256_set1_epi32(`-1`);
52331	let r = _mm256_cmplt_epi32_mask(a, b);
52332	assert_eq!(r, `0b00000101`);
52333	}
52334
52335	#[simd_test(enable = "avx512f,avx512vl")]
52336	const fn test_mm256_mask_cmplt_epi32_mask() {
52337	let a = _mm256_set_epi32(`0`, `1`, `-1`, `101`, i32::MAX, i32::MIN, `100`, `-100`);
52338	let b = _mm256_set1_epi32(`-1`);
52339	let mask = `0b11111111`;
52340	let r = _mm256_mask_cmplt_epi32_mask(mask, a, b);
52341	assert_eq!(r, `0b00000101`);
52342	}
52343
52344	#[simd_test(enable = "avx512f,avx512vl")]
52345	const fn test_mm_cmplt_epi32_mask() {
52346	let a = _mm_set_epi32(i32::MAX, i32::MIN, `100`, `-100`);
52347	let b = _mm_set1_epi32(`-1`);
52348	let r = _mm_cmplt_epi32_mask(a, b);
52349	assert_eq!(r, `0b00000101`);
52350	}
52351
52352	#[simd_test(enable = "avx512f,avx512vl")]
52353	const fn test_mm_mask_cmplt_epi32_mask() {
52354	let a = _mm_set_epi32(i32::MAX, i32::MIN, `100`, `-100`);
52355	let b = _mm_set1_epi32(`-1`);
52356	let mask = `0b11111111`;
52357	let r = _mm_mask_cmplt_epi32_mask(mask, a, b);
52358	assert_eq!(r, `0b00000101`);
52359	}
52360
52361	#[simd_test(enable = "avx512f")]
52362	const fn test_mm512_cmpgt_epi32_mask() {
52363	#[rustfmt::skip]
52364	let a = _mm512_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`,
52365	`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
52366	let b = _mm512_set1_epi32(`-1`);
52367	let m = _mm512_cmpgt_epi32_mask(b, a);
52368	assert_eq!(m, `0b00000101_00000101`);
52369	}
52370
52371	#[simd_test(enable = "avx512f")]
52372	const fn test_mm512_mask_cmpgt_epi32_mask() {
52373	#[rustfmt::skip]
52374	let a = _mm512_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`,
52375	`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
52376	let b = _mm512_set1_epi32(`-1`);
52377	let mask = `0b01100110_01100110`;
52378	let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a);
52379	assert_eq!(r, `0b00000100_00000100`);
52380	}
52381
52382	#[simd_test(enable = "avx512f,avx512vl")]
52383	const fn test_mm256_cmpgt_epi32_mask() {
52384	let a = _mm256_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
52385	let b = _mm256_set1_epi32(`-1`);
52386	let r = _mm256_cmpgt_epi32_mask(a, b);
52387	assert_eq!(r, `0b11011010`);
52388	}
52389
52390	#[simd_test(enable = "avx512f,avx512vl")]
52391	const fn test_mm256_mask_cmpgt_epi32_mask() {
52392	let a = _mm256_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
52393	let b = _mm256_set1_epi32(`-1`);
52394	let mask = `0b11111111`;
52395	let r = _mm256_mask_cmpgt_epi32_mask(mask, a, b);
52396	assert_eq!(r, `0b11011010`);
52397	}
52398
52399	#[simd_test(enable = "avx512f,avx512vl")]
52400	const fn test_mm_cmpgt_epi32_mask() {
52401	let a = _mm_set_epi32(`0`, `1`, `-1`, `13`);
52402	let b = _mm_set1_epi32(`-1`);
52403	let r = _mm_cmpgt_epi32_mask(a, b);
52404	assert_eq!(r, `0b00001101`);
52405	}
52406
52407	#[simd_test(enable = "avx512f,avx512vl")]
52408	const fn test_mm_mask_cmpgt_epi32_mask() {
52409	let a = _mm_set_epi32(`0`, `1`, `-1`, `13`);
52410	let b = _mm_set1_epi32(`-1`);
52411	let mask = `0b11111111`;
52412	let r = _mm_mask_cmpgt_epi32_mask(mask, a, b);
52413	assert_eq!(r, `0b00001101`);
52414	}
52415
52416	#[simd_test(enable = "avx512f")]
52417	const fn test_mm512_cmple_epi32_mask() {
52418	#[rustfmt::skip]
52419	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
52420	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
52421	let b = _mm512_set1_epi32(`-1`);
52422	assert_eq!(
52423	_mm512_cmple_epi32_mask(a, b),
52424	!_mm512_cmpgt_epi32_mask(a, b)
52425	)
52426	}
52427
52428	#[simd_test(enable = "avx512f")]
52429	const fn test_mm512_mask_cmple_epi32_mask() {
52430	#[rustfmt::skip]
52431	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
52432	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
52433	let b = _mm512_set1_epi32(`-1`);
52434	let mask = `0b01111010_01111010`;
52435	assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), `0b01100000_0110000`);
52436	}
52437
52438	#[simd_test(enable = "avx512f,avx512vl")]
52439	const fn test_mm256_cmple_epi32_mask() {
52440	let a = _mm256_set_epi32(`0`, `1`, `-1`, `200`, i32::MAX, i32::MIN, `100`, `-100`);
52441	let b = _mm256_set1_epi32(`-1`);
52442	let r = _mm256_cmple_epi32_mask(a, b);
52443	assert_eq!(r, `0b00100101`)
52444	}
52445
52446	#[simd_test(enable = "avx512f,avx512vl")]
52447	const fn test_mm256_mask_cmple_epi32_mask() {
52448	let a = _mm256_set_epi32(`0`, `1`, `-1`, `200`, i32::MAX, i32::MIN, `100`, `-100`);
52449	let b = _mm256_set1_epi32(`-1`);
52450	let mask = `0b11111111`;
52451	let r = _mm256_mask_cmple_epi32_mask(mask, a, b);
52452	assert_eq!(r, `0b00100101`)
52453	}
52454
52455	#[simd_test(enable = "avx512f,avx512vl")]
52456	const fn test_mm_cmple_epi32_mask() {
52457	let a = _mm_set_epi32(`0`, `1`, `-1`, `200`);
52458	let b = _mm_set1_epi32(`-1`);
52459	let r = _mm_cmple_epi32_mask(a, b);
52460	assert_eq!(r, `0b00000010`)
52461	}
52462
52463	#[simd_test(enable = "avx512f,avx512vl")]
52464	const fn test_mm_mask_cmple_epi32_mask() {
52465	let a = _mm_set_epi32(`0`, `1`, `-1`, `200`);
52466	let b = _mm_set1_epi32(`-1`);
52467	let mask = `0b11111111`;
52468	let r = _mm_mask_cmple_epi32_mask(mask, a, b);
52469	assert_eq!(r, `0b00000010`)
52470	}
52471
52472	#[simd_test(enable = "avx512f")]
52473	const fn test_mm512_cmpge_epi32_mask() {
52474	#[rustfmt::skip]
52475	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
52476	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
52477	let b = _mm512_set1_epi32(`-1`);
52478	assert_eq!(
52479	_mm512_cmpge_epi32_mask(a, b),
52480	!_mm512_cmplt_epi32_mask(a, b)
52481	)
52482	}
52483
52484	#[simd_test(enable = "avx512f")]
52485	const fn test_mm512_mask_cmpge_epi32_mask() {
52486	#[rustfmt::skip]
52487	let a = _mm512_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`,
52488	`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
52489	let b = _mm512_set1_epi32(`-1`);
52490	let mask = `0b01111010_01111010`;
52491	assert_eq!(
52492	_mm512_mask_cmpge_epi32_mask(mask, a, b),
52493	`0b01111010_01111010`
52494	);
52495	}
52496
52497	#[simd_test(enable = "avx512f,avx512vl")]
52498	const fn test_mm256_cmpge_epi32_mask() {
52499	let a = _mm256_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
52500	let b = _mm256_set1_epi32(`-1`);
52501	let r = _mm256_cmpge_epi32_mask(a, b);
52502	assert_eq!(r, `0b11111010`)
52503	}
52504
52505	#[simd_test(enable = "avx512f,avx512vl")]
52506	const fn test_mm256_mask_cmpge_epi32_mask() {
52507	let a = _mm256_set_epi32(`0`, `1`, `-1`, u32::MAX as i32, i32::MAX, i32::MIN, `100`, `-100`);
52508	let b = _mm256_set1_epi32(`-1`);
52509	let mask = `0b11111111`;
52510	let r = _mm256_mask_cmpge_epi32_mask(mask, a, b);
52511	assert_eq!(r, `0b11111010`)
52512	}
52513
52514	#[simd_test(enable = "avx512f,avx512vl")]
52515	const fn test_mm_cmpge_epi32_mask() {
52516	let a = _mm_set_epi32(`0`, `1`, `-1`, u32::MAX as i32);
52517	let b = _mm_set1_epi32(`-1`);
52518	let r = _mm_cmpge_epi32_mask(a, b);
52519	assert_eq!(r, `0b00001111`)
52520	}
52521
52522	#[simd_test(enable = "avx512f,avx512vl")]
52523	const fn test_mm_mask_cmpge_epi32_mask() {
52524	let a = _mm_set_epi32(`0`, `1`, `-1`, u32::MAX as i32);
52525	let b = _mm_set1_epi32(`-1`);
52526	let mask = `0b11111111`;
52527	let r = _mm_mask_cmpge_epi32_mask(mask, a, b);
52528	assert_eq!(r, `0b00001111`)
52529	}
52530
52531	#[simd_test(enable = "avx512f")]
52532	const fn test_mm512_cmpeq_epi32_mask() {
52533	#[rustfmt::skip]
52534	let a = _mm512_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`,
52535	`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
52536	#[rustfmt::skip]
52537	let b = _mm512_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`,
52538	`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
52539	let m = _mm512_cmpeq_epi32_mask(b, a);
52540	assert_eq!(m, `0b11001111_11001111`);
52541	}
52542
52543	#[simd_test(enable = "avx512f")]
52544	const fn test_mm512_mask_cmpeq_epi32_mask() {
52545	#[rustfmt::skip]
52546	let a = _mm512_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`,
52547	`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
52548	#[rustfmt::skip]
52549	let b = _mm512_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`,
52550	`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
52551	let mask = `0b01111010_01111010`;
52552	let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a);
52553	assert_eq!(r, `0b01001010_01001010`);
52554	}
52555
52556	#[simd_test(enable = "avx512f,avx512vl")]
52557	const fn test_mm256_cmpeq_epi32_mask() {
52558	let a = _mm256_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
52559	let b = _mm256_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
52560	let m = _mm256_cmpeq_epi32_mask(b, a);
52561	assert_eq!(m, `0b11001111`);
52562	}
52563
52564	#[simd_test(enable = "avx512f,avx512vl")]
52565	const fn test_mm256_mask_cmpeq_epi32_mask() {
52566	let a = _mm256_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
52567	let b = _mm256_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
52568	let mask = `0b01111010`;
52569	let r = _mm256_mask_cmpeq_epi32_mask(mask, b, a);
52570	assert_eq!(r, `0b01001010`);
52571	}
52572
52573	#[simd_test(enable = "avx512f,avx512vl")]
52574	const fn test_mm_cmpeq_epi32_mask() {
52575	let a = _mm_set_epi32(`0`, `1`, `-1`, `13`);
52576	let b = _mm_set_epi32(`0`, `1`, `13`, `42`);
52577	let m = _mm_cmpeq_epi32_mask(b, a);
52578	assert_eq!(m, `0b00001100`);
52579	}
52580
52581	#[simd_test(enable = "avx512f,avx512vl")]
52582	const fn test_mm_mask_cmpeq_epi32_mask() {
52583	let a = _mm_set_epi32(`0`, `1`, `-1`, `13`);
52584	let b = _mm_set_epi32(`0`, `1`, `13`, `42`);
52585	let mask = `0b11111111`;
52586	let r = _mm_mask_cmpeq_epi32_mask(mask, b, a);
52587	assert_eq!(r, `0b00001100`);
52588	}
52589
52590	#[simd_test(enable = "avx512f")]
52591	const fn test_mm512_cmpneq_epi32_mask() {
52592	#[rustfmt::skip]
52593	let a = _mm512_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`,
52594	`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
52595	#[rustfmt::skip]
52596	let b = _mm512_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`,
52597	`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
52598	let m = _mm512_cmpneq_epi32_mask(b, a);
52599	assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a));
52600	}
52601
52602	#[simd_test(enable = "avx512f")]
52603	const fn test_mm512_mask_cmpneq_epi32_mask() {
52604	#[rustfmt::skip]
52605	let a = _mm512_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `-100`, `100`,
52606	`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `-100`, `100`);
52607	#[rustfmt::skip]
52608	let b = _mm512_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`,
52609	`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
52610	let mask = `0b01111010_01111010`;
52611	let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a);
52612	assert_eq!(r, `0b00110010_00110010`)
52613	}
52614
52615	#[simd_test(enable = "avx512f,avx512vl")]
52616	const fn test_mm256_cmpneq_epi32_mask() {
52617	let a = _mm256_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
52618	let b = _mm256_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
52619	let m = _mm256_cmpneq_epi32_mask(b, a);
52620	assert_eq!(m, !_mm256_cmpeq_epi32_mask(b, a));
52621	}
52622
52623	#[simd_test(enable = "avx512f,avx512vl")]
52624	const fn test_mm256_mask_cmpneq_epi32_mask() {
52625	let a = _mm256_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `-100`, `100`);
52626	let b = _mm256_set_epi32(`0`, `1`, `13`, `42`, i32::MAX, i32::MIN, `100`, `-100`);
52627	let mask = `0b11111111`;
52628	let r = _mm256_mask_cmpneq_epi32_mask(mask, b, a);
52629	assert_eq!(r, `0b00110011`)
52630	}
52631
52632	#[simd_test(enable = "avx512f,avx512vl")]
52633	const fn test_mm_cmpneq_epi32_mask() {
52634	let a = _mm_set_epi32(`0`, `1`, `-1`, `13`);
52635	let b = _mm_set_epi32(`0`, `1`, `13`, `42`);
52636	let r = _mm_cmpneq_epi32_mask(b, a);
52637	assert_eq!(r, `0b00000011`)
52638	}
52639
52640	#[simd_test(enable = "avx512f,avx512vl")]
52641	const fn test_mm_mask_cmpneq_epi32_mask() {
52642	let a = _mm_set_epi32(`0`, `1`, `-1`, `13`);
52643	let b = _mm_set_epi32(`0`, `1`, `13`, `42`);
52644	let mask = `0b11111111`;
52645	let r = _mm_mask_cmpneq_epi32_mask(mask, b, a);
52646	assert_eq!(r, `0b00000011`)
52647	}
52648
52649	#[simd_test(enable = "avx512f")]
52650	const fn test_mm512_cmp_epi32_mask() {
52651	#[rustfmt::skip]
52652	let a = _mm512_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`,
52653	`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
52654	let b = _mm512_set1_epi32(`-1`);
52655	let m = _mm512_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
52656	assert_eq!(m, `0b00000101_00000101`);
52657	}
52658
52659	#[simd_test(enable = "avx512f")]
52660	const fn test_mm512_mask_cmp_epi32_mask() {
52661	#[rustfmt::skip]
52662	let a = _mm512_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`,
52663	`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
52664	let b = _mm512_set1_epi32(`-1`);
52665	let mask = `0b01100110_01100110`;
52666	let r = _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
52667	assert_eq!(r, `0b00000100_00000100`);
52668	}
52669
52670	#[simd_test(enable = "avx512f,avx512vl")]
52671	const fn test_mm256_cmp_epi32_mask() {
52672	let a = _mm256_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
52673	let b = _mm256_set1_epi32(`-1`);
52674	let m = _mm256_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
52675	assert_eq!(m, `0b00000101`);
52676	}
52677
52678	#[simd_test(enable = "avx512f,avx512vl")]
52679	const fn test_mm256_mask_cmp_epi32_mask() {
52680	let a = _mm256_set_epi32(`0`, `1`, `-1`, `13`, i32::MAX, i32::MIN, `100`, `-100`);
52681	let b = _mm256_set1_epi32(`-1`);
52682	let mask = `0b01100110`;
52683	let r = _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
52684	assert_eq!(r, `0b00000100`);
52685	}
52686
52687	#[simd_test(enable = "avx512f,avx512vl")]
52688	const fn test_mm_cmp_epi32_mask() {
52689	let a = _mm_set_epi32(`0`, `1`, `-1`, `13`);
52690	let b = _mm_set1_epi32(`1`);
52691	let m = _mm_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
52692	assert_eq!(m, `0b00001010`);
52693	}
52694
52695	#[simd_test(enable = "avx512f,avx512vl")]
52696	const fn test_mm_mask_cmp_epi32_mask() {
52697	let a = _mm_set_epi32(`0`, `1`, `-1`, `13`);
52698	let b = _mm_set1_epi32(`1`);
52699	let mask = `0b11111111`;
52700	let r = _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
52701	assert_eq!(r, `0b00001010`);
52702	}
52703
52704	#[simd_test(enable = "avx512f")]
52705	const fn test_mm512_set_epi8() {
52706	let r = _mm512_set1_epi8(`2`);
52707	assert_eq_m512i(
52708	r,
52709	_mm512_set_epi8(
52710	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
52711	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
52712	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
52713	),
52714	)
52715	}
52716
52717	#[simd_test(enable = "avx512f")]
52718	const fn test_mm512_set_epi16() {
52719	let r = _mm512_set1_epi16(`2`);
52720	assert_eq_m512i(
52721	r,
52722	_mm512_set_epi16(
52723	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
52724	`2`, `2`, `2`, `2`,
52725	),
52726	)
52727	}
52728
52729	#[simd_test(enable = "avx512f")]
52730	const fn test_mm512_set_epi32() {
52731	let r = _mm512_setr_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
52732	assert_eq_m512i(
52733	r,
52734	_mm512_set_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`),
52735	)
52736	}
52737
52738	#[simd_test(enable = "avx512f")]
52739	const fn test_mm512_setr_epi32() {
52740	let r = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
52741	assert_eq_m512i(
52742	r,
52743	_mm512_setr_epi32(`15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`, `0`),
52744	)
52745	}
52746
52747	#[simd_test(enable = "avx512f")]
52748	const fn test_mm512_set1_epi8() {
52749	let r = _mm512_set_epi8(
52750	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
52751	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
52752	`2`, `2`, `2`, `2`, `2`, `2`,
52753	);
52754	assert_eq_m512i(r, _mm512_set1_epi8(`2`));
52755	}
52756
52757	#[simd_test(enable = "avx512f")]
52758	const fn test_mm512_set1_epi16() {
52759	let r = _mm512_set_epi16(
52760	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
52761	`2`, `2`, `2`,
52762	);
52763	assert_eq_m512i(r, _mm512_set1_epi16(`2`));
52764	}
52765
52766	#[simd_test(enable = "avx512f")]
52767	const fn test_mm512_set1_epi32() {
52768	let r = _mm512_set_epi32(`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
52769	assert_eq_m512i(r, _mm512_set1_epi32(`2`));
52770	}
52771
52772	#[simd_test(enable = "avx512f")]
52773	const fn test_mm512_setzero_si512() {
52774	assert_eq_m512i(_mm512_set1_epi32(`0`), _mm512_setzero_si512());
52775	}
52776
52777	#[simd_test(enable = "avx512f")]
52778	const fn test_mm512_setzero_epi32() {
52779	assert_eq_m512i(_mm512_set1_epi32(`0`), _mm512_setzero_epi32());
52780	}
52781
52782	#[simd_test(enable = "avx512f")]
52783	const fn test_mm512_set_ps() {
52784	let r = _mm512_setr_ps(
52785	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
52786	);
52787	assert_eq_m512(
52788	r,
52789	_mm512_set_ps(
52790	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
52791	),
52792	)
52793	}
52794
52795	#[simd_test(enable = "avx512f")]
52796	const fn test_mm512_setr_ps() {
52797	let r = _mm512_set_ps(
52798	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
52799	);
52800	assert_eq_m512(
52801	r,
52802	_mm512_setr_ps(
52803	`15.`, `14.`, `13.`, `12.`, `11.`, `10.`, `9.`, `8.`, `7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`,
52804	),
52805	)
52806	}
52807
52808	#[simd_test(enable = "avx512f")]
52809	const fn test_mm512_set1_ps() {
52810	#[rustfmt::skip]
52811	let expected = _mm512_set_ps(`2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
52812	`2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`);
52813	assert_eq_m512(expected, _mm512_set1_ps(`2.`));
52814	}
52815
52816	#[simd_test(enable = "avx512f")]
52817	const fn test_mm512_set4_epi32() {
52818	let r = _mm512_set_epi32(`4`, `3`, `2`, `1`, `4`, `3`, `2`, `1`, `4`, `3`, `2`, `1`, `4`, `3`, `2`, `1`);
52819	assert_eq_m512i(r, _mm512_set4_epi32(`4`, `3`, `2`, `1`));
52820	}
52821
52822	#[simd_test(enable = "avx512f")]
52823	const fn test_mm512_set4_ps() {
52824	let r = _mm512_set_ps(
52825	`4.`, `3.`, `2.`, `1.`, `4.`, `3.`, `2.`, `1.`, `4.`, `3.`, `2.`, `1.`, `4.`, `3.`, `2.`, `1.`,
52826	);
52827	assert_eq_m512(r, _mm512_set4_ps(`4.`, `3.`, `2.`, `1.`));
52828	}
52829
52830	#[simd_test(enable = "avx512f")]
52831	const fn test_mm512_setr4_epi32() {
52832	let r = _mm512_set_epi32(`4`, `3`, `2`, `1`, `4`, `3`, `2`, `1`, `4`, `3`, `2`, `1`, `4`, `3`, `2`, `1`);
52833	assert_eq_m512i(r, _mm512_setr4_epi32(`1`, `2`, `3`, `4`));
52834	}
52835
52836	#[simd_test(enable = "avx512f")]
52837	const fn test_mm512_setr4_ps() {
52838	let r = _mm512_set_ps(
52839	`4.`, `3.`, `2.`, `1.`, `4.`, `3.`, `2.`, `1.`, `4.`, `3.`, `2.`, `1.`, `4.`, `3.`, `2.`, `1.`,
52840	);
52841	assert_eq_m512(r, _mm512_setr4_ps(`1.`, `2.`, `3.`, `4.`));
52842	}
52843
52844	#[simd_test(enable = "avx512f")]
52845	const fn test_mm512_setzero_ps() {
52846	assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(`0.`));
52847	}
52848
52849	#[simd_test(enable = "avx512f")]
52850	const fn test_mm512_setzero() {
52851	assert_eq_m512(_mm512_setzero(), _mm512_set1_ps(`0.`));
52852	}
52853
52854	#[simd_test(enable = "avx512f")]
52855	const fn test_mm512_loadu_pd() {
52856	let a = &[`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`];
52857	let p = a.as_ptr();
52858	let r = unsafe { _mm512_loadu_pd(black_box(p)) };
52859	let e = _mm512_setr_pd(`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`);
52860	assert_eq_m512d(r, e);
52861	}
52862
52863	#[simd_test(enable = "avx512f")]
52864	const fn test_mm512_storeu_pd() {
52865	let a = _mm512_set1_pd(`9.`);
52866	let mut r = _mm512_undefined_pd();
52867	unsafe {
52868	_mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
52869	}
52870	assert_eq_m512d(r, a);
52871	}
52872
52873	#[simd_test(enable = "avx512f")]
52874	const fn test_mm512_loadu_ps() {
52875	let a = &[
52876	`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`, `-4.`, `-3.`, `-2.`, `-5.`, `-8.`, `-9.`, `-64.`, `-50.`,
52877	];
52878	let p = a.as_ptr();
52879	let r = unsafe { _mm512_loadu_ps(black_box(p)) };
52880	let e = _mm512_setr_ps(
52881	`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`, `-4.`, `-3.`, `-2.`, `-5.`, `-8.`, `-9.`, `-64.`, `-50.`,
52882	);
52883	assert_eq_m512(r, e);
52884	}
52885
52886	#[simd_test(enable = "avx512f")]
52887	const fn test_mm512_storeu_ps() {
52888	let a = _mm512_set1_ps(`9.`);
52889	let mut r = _mm512_undefined_ps();
52890	unsafe {
52891	_mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
52892	}
52893	assert_eq_m512(r, a);
52894	}
52895
52896	#[simd_test(enable = "avx512f")]
52897	const fn test_mm512_mask_loadu_epi32() {
52898	let src = _mm512_set1_epi32(`42`);
52899	let a = &[`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`];
52900	let p = a.as_ptr();
52901	let m = `0b11101000_11001010`;
52902	let r = unsafe { _mm512_mask_loadu_epi32(src, m, black_box(p)) };
52903	let e = _mm512_setr_epi32(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`, `42`, `42`, `42`, `12`, `42`, `14`, `15`, `16`);
52904	assert_eq_m512i(r, e);
52905	}
52906
52907	#[simd_test(enable = "avx512f")]
52908	const fn test_mm512_maskz_loadu_epi32() {
52909	let a = &[`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`];
52910	let p = a.as_ptr();
52911	let m = `0b11101000_11001010`;
52912	let r = unsafe { _mm512_maskz_loadu_epi32(m, black_box(p)) };
52913	let e = _mm512_setr_epi32(`0`, `2`, `0`, `4`, `0`, `0`, `7`, `8`, `0`, `0`, `0`, `12`, `0`, `14`, `15`, `16`);
52914	assert_eq_m512i(r, e);
52915	}
52916
52917	#[simd_test(enable = "avx512f")]
52918	const fn test_mm512_mask_load_epi32() {
52919	#[repr(align(`64`))]
52920	struct Align {
52921	data: [i32; `16`], // 64 bytes
52922	}
52923	let src = _mm512_set1_epi32(`42`);
52924	let a = Align {
52925	data: [`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`],
52926	};
52927	let p = a.data.as_ptr();
52928	let m = `0b11101000_11001010`;
52929	let r = unsafe { _mm512_mask_load_epi32(src, m, black_box(p)) };
52930	let e = _mm512_setr_epi32(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`, `42`, `42`, `42`, `12`, `42`, `14`, `15`, `16`);
52931	assert_eq_m512i(r, e);
52932	}
52933
52934	#[simd_test(enable = "avx512f")]
52935	const fn test_mm512_maskz_load_epi32() {
52936	#[repr(align(`64`))]
52937	struct Align {
52938	data: [i32; `16`], // 64 bytes
52939	}
52940	let a = Align {
52941	data: [`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`],
52942	};
52943	let p = a.data.as_ptr();
52944	let m = `0b11101000_11001010`;
52945	let r = unsafe { _mm512_maskz_load_epi32(m, black_box(p)) };
52946	let e = _mm512_setr_epi32(`0`, `2`, `0`, `4`, `0`, `0`, `7`, `8`, `0`, `0`, `0`, `12`, `0`, `14`, `15`, `16`);
52947	assert_eq_m512i(r, e);
52948	}
52949
52950	#[simd_test(enable = "avx512f")]
52951	const fn test_mm512_mask_storeu_epi32() {
52952	let mut r = [`42_i32`; `16`];
52953	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
52954	let m = `0b11101000_11001010`;
52955	unsafe {
52956	_mm512_mask_storeu_epi32(r.as_mut_ptr(), m, a);
52957	}
52958	let e = _mm512_setr_epi32(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`, `42`, `42`, `42`, `12`, `42`, `14`, `15`, `16`);
52959	assert_eq_m512i(unsafe { _mm512_loadu_epi32(r.as_ptr()) }, e);
52960	}
52961
52962	#[simd_test(enable = "avx512f")]
52963	const fn test_mm512_mask_store_epi32() {
52964	#[repr(align(`64`))]
52965	struct Align {
52966	data: [i32; `16`],
52967	}
52968	let mut r = Align { data: [`42`; `16`] };
52969	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
52970	let m = `0b11101000_11001010`;
52971	unsafe {
52972	_mm512_mask_store_epi32(r.data.as_mut_ptr(), m, a);
52973	}
52974	let e = _mm512_setr_epi32(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`, `42`, `42`, `42`, `12`, `42`, `14`, `15`, `16`);
52975	assert_eq_m512i(unsafe { _mm512_load_epi32(r.data.as_ptr()) }, e);
52976	}
52977
52978	#[simd_test(enable = "avx512f")]
52979	const fn test_mm512_mask_loadu_epi64() {
52980	let src = _mm512_set1_epi64(`42`);
52981	let a = &[`1_i64`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
52982	let p = a.as_ptr();
52983	let m = `0b11001010`;
52984	let r = unsafe { _mm512_mask_loadu_epi64(src, m, black_box(p)) };
52985	let e = _mm512_setr_epi64(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`);
52986	assert_eq_m512i(r, e);
52987	}
52988
52989	#[simd_test(enable = "avx512f")]
52990	const fn test_mm512_maskz_loadu_epi64() {
52991	let a = &[`1_i64`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
52992	let p = a.as_ptr();
52993	let m = `0b11001010`;
52994	let r = unsafe { _mm512_maskz_loadu_epi64(m, black_box(p)) };
52995	let e = _mm512_setr_epi64(`0`, `2`, `0`, `4`, `0`, `0`, `7`, `8`);
52996	assert_eq_m512i(r, e);
52997	}
52998
52999	#[simd_test(enable = "avx512f")]
53000	const fn test_mm512_mask_load_epi64() {
53001	#[repr(align(`64`))]
53002	struct Align {
53003	data: [i64; `8`], // 64 bytes
53004	}
53005	let src = _mm512_set1_epi64(`42`);
53006	let a = Align {
53007	data: [`1_i64`, `2`, `3`, `4`, `5`, `6`, `7`, `8`],
53008	};
53009	let p = a.data.as_ptr();
53010	let m = `0b11001010`;
53011	let r = unsafe { _mm512_mask_load_epi64(src, m, black_box(p)) };
53012	let e = _mm512_setr_epi64(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`);
53013	assert_eq_m512i(r, e);
53014	}
53015
53016	#[simd_test(enable = "avx512f")]
53017	const fn test_mm512_maskz_load_epi64() {
53018	#[repr(align(`64`))]
53019	struct Align {
53020	data: [i64; `8`], // 64 bytes
53021	}
53022	let a = Align {
53023	data: [`1_i64`, `2`, `3`, `4`, `5`, `6`, `7`, `8`],
53024	};
53025	let p = a.data.as_ptr();
53026	let m = `0b11001010`;
53027	let r = unsafe { _mm512_maskz_load_epi64(m, black_box(p)) };
53028	let e = _mm512_setr_epi64(`0`, `2`, `0`, `4`, `0`, `0`, `7`, `8`);
53029	assert_eq_m512i(r, e);
53030	}
53031
53032	#[simd_test(enable = "avx512f")]
53033	const fn test_mm512_mask_storeu_epi64() {
53034	let mut r = [`42_i64`; `8`];
53035	let a = _mm512_setr_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
53036	let m = `0b11001010`;
53037	unsafe {
53038	_mm512_mask_storeu_epi64(r.as_mut_ptr(), m, a);
53039	}
53040	let e = _mm512_setr_epi64(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`);
53041	assert_eq_m512i(unsafe { _mm512_loadu_epi64(r.as_ptr()) }, e);
53042	}
53043
53044	#[simd_test(enable = "avx512f")]
53045	const fn test_mm512_mask_store_epi64() {
53046	#[repr(align(`64`))]
53047	struct Align {
53048	data: [i64; `8`],
53049	}
53050	let mut r = Align { data: [`42`; `8`] };
53051	let a = _mm512_setr_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
53052	let m = `0b11001010`;
53053	let p = r.data.as_mut_ptr();
53054	unsafe {
53055	_mm512_mask_store_epi64(p, m, a);
53056	}
53057	let e = _mm512_setr_epi64(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`);
53058	assert_eq_m512i(unsafe { _mm512_load_epi64(r.data.as_ptr()) }, e);
53059	}
53060
53061	#[simd_test(enable = "avx512f")]
53062	const fn test_mm512_mask_loadu_ps() {
53063	let src = _mm512_set1_ps(`42.0`);
53064	let a = &[
53065	`1.0_f32`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`,
53066	`16.0`,
53067	];
53068	let p = a.as_ptr();
53069	let m = `0b11101000_11001010`;
53070	let r = unsafe { _mm512_mask_loadu_ps(src, m, black_box(p)) };
53071	let e = _mm512_setr_ps(
53072	`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`, `42.0`, `42.0`, `42.0`, `12.0`, `42.0`, `14.0`, `15.0`,
53073	`16.0`,
53074	);
53075	assert_eq_m512(r, e);
53076	}
53077
53078	#[simd_test(enable = "avx512f")]
53079	const fn test_mm512_maskz_loadu_ps() {
53080	let a = &[
53081	`1.0_f32`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`,
53082	`16.0`,
53083	];
53084	let p = a.as_ptr();
53085	let m = `0b11101000_11001010`;
53086	let r = unsafe { _mm512_maskz_loadu_ps(m, black_box(p)) };
53087	let e = _mm512_setr_ps(
53088	`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `0.0`, `7.0`, `8.0`, `0.0`, `0.0`, `0.0`, `12.0`, `0.0`, `14.0`, `15.0`, `16.0`,
53089	);
53090	assert_eq_m512(r, e);
53091	}
53092
53093	#[simd_test(enable = "avx512f")]
53094	const fn test_mm512_mask_load_ps() {
53095	#[repr(align(`64`))]
53096	struct Align {
53097	data: [f32; `16`], // 64 bytes
53098	}
53099	let src = _mm512_set1_ps(`42.0`);
53100	let a = Align {
53101	data: [
53102	`1.0_f32`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`,
53103	`15.0`, `16.0`,
53104	],
53105	};
53106	let p = a.data.as_ptr();
53107	let m = `0b11101000_11001010`;
53108	let r = unsafe { _mm512_mask_load_ps(src, m, black_box(p)) };
53109	let e = _mm512_setr_ps(
53110	`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`, `42.0`, `42.0`, `42.0`, `12.0`, `42.0`, `14.0`, `15.0`,
53111	`16.0`,
53112	);
53113	assert_eq_m512(r, e);
53114	}
53115
53116	#[simd_test(enable = "avx512f")]
53117	const fn test_mm512_maskz_load_ps() {
53118	#[repr(align(`64`))]
53119	struct Align {
53120	data: [f32; `16`], // 64 bytes
53121	}
53122	let a = Align {
53123	data: [
53124	`1.0_f32`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`,
53125	`15.0`, `16.0`,
53126	],
53127	};
53128	let p = a.data.as_ptr();
53129	let m = `0b11101000_11001010`;
53130	let r = unsafe { _mm512_maskz_load_ps(m, black_box(p)) };
53131	let e = _mm512_setr_ps(
53132	`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `0.0`, `7.0`, `8.0`, `0.0`, `0.0`, `0.0`, `12.0`, `0.0`, `14.0`, `15.0`, `16.0`,
53133	);
53134	assert_eq_m512(r, e);
53135	}
53136
53137	#[simd_test(enable = "avx512f")]
53138	const fn test_mm512_mask_storeu_ps() {
53139	let mut r = [`42_f32`; `16`];
53140	let a = _mm512_setr_ps(
53141	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
53142	);
53143	let m = `0b11101000_11001010`;
53144	unsafe {
53145	_mm512_mask_storeu_ps(r.as_mut_ptr(), m, a);
53146	}
53147	let e = _mm512_setr_ps(
53148	`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`, `42.0`, `42.0`, `42.0`, `12.0`, `42.0`, `14.0`, `15.0`,
53149	`16.0`,
53150	);
53151	assert_eq_m512(unsafe { _mm512_loadu_ps(r.as_ptr()) }, e);
53152	}
53153
53154	#[simd_test(enable = "avx512f")]
53155	const fn test_mm512_mask_store_ps() {
53156	#[repr(align(`64`))]
53157	struct Align {
53158	data: [f32; `16`],
53159	}
53160	let mut r = Align { data: [`42.0`; `16`] };
53161	let a = _mm512_setr_ps(
53162	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`, `16.0`,
53163	);
53164	let m = `0b11101000_11001010`;
53165	unsafe {
53166	_mm512_mask_store_ps(r.data.as_mut_ptr(), m, a);
53167	}
53168	let e = _mm512_setr_ps(
53169	`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`, `42.0`, `42.0`, `42.0`, `12.0`, `42.0`, `14.0`, `15.0`,
53170	`16.0`,
53171	);
53172	assert_eq_m512(unsafe { _mm512_load_ps(r.data.as_ptr()) }, e);
53173	}
53174
53175	#[simd_test(enable = "avx512f")]
53176	const fn test_mm512_mask_loadu_pd() {
53177	let src = _mm512_set1_pd(`42.0`);
53178	let a = &[`1.0_f64`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`];
53179	let p = a.as_ptr();
53180	let m = `0b11001010`;
53181	let r = unsafe { _mm512_mask_loadu_pd(src, m, black_box(p)) };
53182	let e = _mm512_setr_pd(`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`);
53183	assert_eq_m512d(r, e);
53184	}
53185
53186	#[simd_test(enable = "avx512f")]
53187	const fn test_mm512_maskz_loadu_pd() {
53188	let a = &[`1.0_f64`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`];
53189	let p = a.as_ptr();
53190	let m = `0b11001010`;
53191	let r = unsafe { _mm512_maskz_loadu_pd(m, black_box(p)) };
53192	let e = _mm512_setr_pd(`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `0.0`, `7.0`, `8.0`);
53193	assert_eq_m512d(r, e);
53194	}
53195
53196	#[simd_test(enable = "avx512f")]
53197	const fn test_mm512_mask_load_pd() {
53198	#[repr(align(`64`))]
53199	struct Align {
53200	data: [f64; `8`], // 64 bytes
53201	}
53202	let src = _mm512_set1_pd(`42.0`);
53203	let a = Align {
53204	data: [`1.0_f64`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`],
53205	};
53206	let p = a.data.as_ptr();
53207	let m = `0b11001010`;
53208	let r = unsafe { _mm512_mask_load_pd(src, m, black_box(p)) };
53209	let e = _mm512_setr_pd(`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`);
53210	assert_eq_m512d(r, e);
53211	}
53212
53213	#[simd_test(enable = "avx512f")]
53214	const fn test_mm512_maskz_load_pd() {
53215	#[repr(align(`64`))]
53216	struct Align {
53217	data: [f64; `8`], // 64 bytes
53218	}
53219	let a = Align {
53220	data: [`1.0_f64`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`],
53221	};
53222	let p = a.data.as_ptr();
53223	let m = `0b11001010`;
53224	let r = unsafe { _mm512_maskz_load_pd(m, black_box(p)) };
53225	let e = _mm512_setr_pd(`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `0.0`, `7.0`, `8.0`);
53226	assert_eq_m512d(r, e);
53227	}
53228
53229	#[simd_test(enable = "avx512f")]
53230	const fn test_mm512_mask_storeu_pd() {
53231	let mut r = [`42_f64`; `8`];
53232	let a = _mm512_setr_pd(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
53233	let m = `0b11001010`;
53234	unsafe {
53235	_mm512_mask_storeu_pd(r.as_mut_ptr(), m, a);
53236	}
53237	let e = _mm512_setr_pd(`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`);
53238	assert_eq_m512d(unsafe { _mm512_loadu_pd(r.as_ptr()) }, e);
53239	}
53240
53241	#[simd_test(enable = "avx512f")]
53242	const fn test_mm512_mask_store_pd() {
53243	#[repr(align(`64`))]
53244	struct Align {
53245	data: [f64; `8`],
53246	}
53247	let mut r = Align { data: [`42.0`; `8`] };
53248	let a = _mm512_setr_pd(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
53249	let m = `0b11001010`;
53250	unsafe {
53251	_mm512_mask_store_pd(r.data.as_mut_ptr(), m, a);
53252	}
53253	let e = _mm512_setr_pd(`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`);
53254	assert_eq_m512d(unsafe { _mm512_load_pd(r.data.as_ptr()) }, e);
53255	}
53256
53257	#[simd_test(enable = "avx512f,avx512vl")]
53258	const fn test_mm256_mask_loadu_epi32() {
53259	let src = _mm256_set1_epi32(`42`);
53260	let a = &[`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
53261	let p = a.as_ptr();
53262	let m = `0b11001010`;
53263	let r = unsafe { _mm256_mask_loadu_epi32(src, m, black_box(p)) };
53264	let e = _mm256_setr_epi32(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`);
53265	assert_eq_m256i(r, e);
53266	}
53267
53268	#[simd_test(enable = "avx512f,avx512vl")]
53269	const fn test_mm256_maskz_loadu_epi32() {
53270	let a = &[`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
53271	let p = a.as_ptr();
53272	let m = `0b11001010`;
53273	let r = unsafe { _mm256_maskz_loadu_epi32(m, black_box(p)) };
53274	let e = _mm256_setr_epi32(`0`, `2`, `0`, `4`, `0`, `0`, `7`, `8`);
53275	assert_eq_m256i(r, e);
53276	}
53277
53278	#[simd_test(enable = "avx512f,avx512vl")]
53279	const fn test_mm256_mask_load_epi32() {
53280	#[repr(align(`32`))]
53281	struct Align {
53282	data: [i32; `8`], // 32 bytes
53283	}
53284	let src = _mm256_set1_epi32(`42`);
53285	let a = Align {
53286	data: [`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`],
53287	};
53288	let p = a.data.as_ptr();
53289	let m = `0b11001010`;
53290	let r = unsafe { _mm256_mask_load_epi32(src, m, black_box(p)) };
53291	let e = _mm256_setr_epi32(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`);
53292	assert_eq_m256i(r, e);
53293	}
53294
53295	#[simd_test(enable = "avx512f,avx512vl")]
53296	const fn test_mm256_maskz_load_epi32() {
53297	#[repr(align(`32`))]
53298	struct Align {
53299	data: [i32; `8`], // 32 bytes
53300	}
53301	let a = Align {
53302	data: [`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`],
53303	};
53304	let p = a.data.as_ptr();
53305	let m = `0b11001010`;
53306	let r = unsafe { _mm256_maskz_load_epi32(m, black_box(p)) };
53307	let e = _mm256_setr_epi32(`0`, `2`, `0`, `4`, `0`, `0`, `7`, `8`);
53308	assert_eq_m256i(r, e);
53309	}
53310
53311	#[simd_test(enable = "avx512f,avx512vl")]
53312	const fn test_mm256_mask_storeu_epi32() {
53313	let mut r = [`42_i32`; `8`];
53314	let a = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
53315	let m = `0b11001010`;
53316	unsafe {
53317	_mm256_mask_storeu_epi32(r.as_mut_ptr(), m, a);
53318	}
53319	let e = _mm256_setr_epi32(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`);
53320	assert_eq_m256i(unsafe { _mm256_loadu_epi32(r.as_ptr()) }, e);
53321	}
53322
53323	#[simd_test(enable = "avx512f,avx512vl")]
53324	const fn test_mm256_mask_store_epi32() {
53325	#[repr(align(`64`))]
53326	struct Align {
53327	data: [i32; `8`],
53328	}
53329	let mut r = Align { data: [`42`; `8`] };
53330	let a = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
53331	let m = `0b11001010`;
53332	unsafe {
53333	_mm256_mask_store_epi32(r.data.as_mut_ptr(), m, a);
53334	}
53335	let e = _mm256_setr_epi32(`42`, `2`, `42`, `4`, `42`, `42`, `7`, `8`);
53336	assert_eq_m256i(unsafe { _mm256_load_epi32(r.data.as_ptr()) }, e);
53337	}
53338
53339	#[simd_test(enable = "avx512f,avx512vl")]
53340	const fn test_mm256_mask_loadu_epi64() {
53341	let src = _mm256_set1_epi64x(`42`);
53342	let a = &[`1_i64`, `2`, `3`, `4`];
53343	let p = a.as_ptr();
53344	let m = `0b1010`;
53345	let r = unsafe { _mm256_mask_loadu_epi64(src, m, black_box(p)) };
53346	let e = _mm256_setr_epi64x(`42`, `2`, `42`, `4`);
53347	assert_eq_m256i(r, e);
53348	}
53349
53350	#[simd_test(enable = "avx512f,avx512vl")]
53351	const fn test_mm256_maskz_loadu_epi64() {
53352	let a = &[`1_i64`, `2`, `3`, `4`];
53353	let p = a.as_ptr();
53354	let m = `0b1010`;
53355	let r = unsafe { _mm256_maskz_loadu_epi64(m, black_box(p)) };
53356	let e = _mm256_setr_epi64x(`0`, `2`, `0`, `4`);
53357	assert_eq_m256i(r, e);
53358	}
53359
53360	#[simd_test(enable = "avx512f,avx512vl")]
53361	const fn test_mm256_mask_load_epi64() {
53362	#[repr(align(`32`))]
53363	struct Align {
53364	data: [i64; `4`], // 32 bytes
53365	}
53366	let src = _mm256_set1_epi64x(`42`);
53367	let a = Align {
53368	data: [`1_i64`, `2`, `3`, `4`],
53369	};
53370	let p = a.data.as_ptr();
53371	let m = `0b1010`;
53372	let r = unsafe { _mm256_mask_load_epi64(src, m, black_box(p)) };
53373	let e = _mm256_setr_epi64x(`42`, `2`, `42`, `4`);
53374	assert_eq_m256i(r, e);
53375	}
53376
53377	#[simd_test(enable = "avx512f,avx512vl")]
53378	const fn test_mm256_maskz_load_epi64() {
53379	#[repr(align(`32`))]
53380	struct Align {
53381	data: [i64; `4`], // 32 bytes
53382	}
53383	let a = Align {
53384	data: [`1_i64`, `2`, `3`, `4`],
53385	};
53386	let p = a.data.as_ptr();
53387	let m = `0b1010`;
53388	let r = unsafe { _mm256_maskz_load_epi64(m, black_box(p)) };
53389	let e = _mm256_setr_epi64x(`0`, `2`, `0`, `4`);
53390	assert_eq_m256i(r, e);
53391	}
53392
53393	#[simd_test(enable = "avx512f,avx512vl")]
53394	const fn test_mm256_mask_storeu_epi64() {
53395	let mut r = [`42_i64`; `4`];
53396	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
53397	let m = `0b1010`;
53398	unsafe {
53399	_mm256_mask_storeu_epi64(r.as_mut_ptr(), m, a);
53400	}
53401	let e = _mm256_setr_epi64x(`42`, `2`, `42`, `4`);
53402	assert_eq_m256i(unsafe { _mm256_loadu_epi64(r.as_ptr()) }, e);
53403	}
53404
53405	#[simd_test(enable = "avx512f,avx512vl")]
53406	const fn test_mm256_mask_store_epi64() {
53407	#[repr(align(`32`))]
53408	struct Align {
53409	data: [i64; `4`],
53410	}
53411	let mut r = Align { data: [`42`; `4`] };
53412	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
53413	let m = `0b1010`;
53414	unsafe {
53415	_mm256_mask_store_epi64(r.data.as_mut_ptr(), m, a);
53416	}
53417	let e = _mm256_setr_epi64x(`42`, `2`, `42`, `4`);
53418	assert_eq_m256i(unsafe { _mm256_load_epi64(r.data.as_ptr()) }, e);
53419	}
53420
53421	#[simd_test(enable = "avx512f,avx512vl")]
53422	const fn test_mm256_mask_loadu_ps() {
53423	let src = _mm256_set1_ps(`42.0`);
53424	let a = &[`1.0_f32`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`];
53425	let p = a.as_ptr();
53426	let m = `0b11001010`;
53427	let r = unsafe { _mm256_mask_loadu_ps(src, m, black_box(p)) };
53428	let e = _mm256_setr_ps(`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`);
53429	assert_eq_m256(r, e);
53430	}
53431
53432	#[simd_test(enable = "avx512f,avx512vl")]
53433	const fn test_mm256_maskz_loadu_ps() {
53434	let a = &[`1.0_f32`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`];
53435	let p = a.as_ptr();
53436	let m = `0b11001010`;
53437	let r = unsafe { _mm256_maskz_loadu_ps(m, black_box(p)) };
53438	let e = _mm256_setr_ps(`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `0.0`, `7.0`, `8.0`);
53439	assert_eq_m256(r, e);
53440	}
53441
53442	#[simd_test(enable = "avx512f,avx512vl")]
53443	const fn test_mm256_mask_load_ps() {
53444	#[repr(align(`32`))]
53445	struct Align {
53446	data: [f32; `8`], // 32 bytes
53447	}
53448	let src = _mm256_set1_ps(`42.0`);
53449	let a = Align {
53450	data: [`1.0_f32`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`],
53451	};
53452	let p = a.data.as_ptr();
53453	let m = `0b11001010`;
53454	let r = unsafe { _mm256_mask_load_ps(src, m, black_box(p)) };
53455	let e = _mm256_setr_ps(`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`);
53456	assert_eq_m256(r, e);
53457	}
53458
53459	#[simd_test(enable = "avx512f,avx512vl")]
53460	const fn test_mm256_maskz_load_ps() {
53461	#[repr(align(`32`))]
53462	struct Align {
53463	data: [f32; `8`], // 32 bytes
53464	}
53465	let a = Align {
53466	data: [`1.0_f32`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`],
53467	};
53468	let p = a.data.as_ptr();
53469	let m = `0b11001010`;
53470	let r = unsafe { _mm256_maskz_load_ps(m, black_box(p)) };
53471	let e = _mm256_setr_ps(`0.0`, `2.0`, `0.0`, `4.0`, `0.0`, `0.0`, `7.0`, `8.0`);
53472	assert_eq_m256(r, e);
53473	}
53474
53475	#[simd_test(enable = "avx512f,avx512vl")]
53476	const fn test_mm256_mask_storeu_ps() {
53477	let mut r = [`42_f32`; `8`];
53478	let a = _mm256_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
53479	let m = `0b11001010`;
53480	unsafe {
53481	_mm256_mask_storeu_ps(r.as_mut_ptr(), m, a);
53482	}
53483	let e = _mm256_setr_ps(`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`);
53484	assert_eq_m256(unsafe { _mm256_loadu_ps(r.as_ptr()) }, e);
53485	}
53486
53487	#[simd_test(enable = "avx512f,avx512vl")]
53488	const fn test_mm256_mask_store_ps() {
53489	#[repr(align(`32`))]
53490	struct Align {
53491	data: [f32; `8`],
53492	}
53493	let mut r = Align { data: [`42.0`; `8`] };
53494	let a = _mm256_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`, `8.0`);
53495	let m = `0b11001010`;
53496	unsafe {
53497	_mm256_mask_store_ps(r.data.as_mut_ptr(), m, a);
53498	}
53499	let e = _mm256_setr_ps(`42.0`, `2.0`, `42.0`, `4.0`, `42.0`, `42.0`, `7.0`, `8.0`);
53500	assert_eq_m256(unsafe { _mm256_load_ps(r.data.as_ptr()) }, e);
53501	}
53502
53503	#[simd_test(enable = "avx512f,avx512vl")]
53504	const fn test_mm256_mask_loadu_pd() {
53505	let src = _mm256_set1_pd(`42.0`);
53506	let a = &[`1.0_f64`, `2.0`, `3.0`, `4.0`];
53507	let p = a.as_ptr();
53508	let m = `0b1010`;
53509	let r = unsafe { _mm256_mask_loadu_pd(src, m, black_box(p)) };
53510	let e = _mm256_setr_pd(`42.0`, `2.0`, `42.0`, `4.0`);
53511	assert_eq_m256d(r, e);
53512	}
53513
53514	#[simd_test(enable = "avx512f,avx512vl")]
53515	const fn test_mm256_maskz_loadu_pd() {
53516	let a = &[`1.0_f64`, `2.0`, `3.0`, `4.0`];
53517	let p = a.as_ptr();
53518	let m = `0b1010`;
53519	let r = unsafe { _mm256_maskz_loadu_pd(m, black_box(p)) };
53520	let e = _mm256_setr_pd(`0.0`, `2.0`, `0.0`, `4.0`);
53521	assert_eq_m256d(r, e);
53522	}
53523
53524	#[simd_test(enable = "avx512f,avx512vl")]
53525	const fn test_mm256_mask_load_pd() {
53526	#[repr(align(`32`))]
53527	struct Align {
53528	data: [f64; `4`], // 32 bytes
53529	}
53530	let src = _mm256_set1_pd(`42.0`);
53531	let a = Align {
53532	data: [`1.0_f64`, `2.0`, `3.0`, `4.0`],
53533	};
53534	let p = a.data.as_ptr();
53535	let m = `0b1010`;
53536	let r = unsafe { _mm256_mask_load_pd(src, m, black_box(p)) };
53537	let e = _mm256_setr_pd(`42.0`, `2.0`, `42.0`, `4.0`);
53538	assert_eq_m256d(r, e);
53539	}
53540
53541	#[simd_test(enable = "avx512f,avx512vl")]
53542	const fn test_mm256_maskz_load_pd() {
53543	#[repr(align(`32`))]
53544	struct Align {
53545	data: [f64; `4`], // 32 bytes
53546	}
53547	let a = Align {
53548	data: [`1.0_f64`, `2.0`, `3.0`, `4.0`],
53549	};
53550	let p = a.data.as_ptr();
53551	let m = `0b1010`;
53552	let r = unsafe { _mm256_maskz_load_pd(m, black_box(p)) };
53553	let e = _mm256_setr_pd(`0.0`, `2.0`, `0.0`, `4.0`);
53554	assert_eq_m256d(r, e);
53555	}
53556
53557	#[simd_test(enable = "avx512f,avx512vl")]
53558	const fn test_mm256_mask_storeu_pd() {
53559	let mut r = [`42_f64`; `4`];
53560	let a = _mm256_setr_pd(`1.0`, `2.0`, `3.0`, `4.0`);
53561	let m = `0b1010`;
53562	unsafe {
53563	_mm256_mask_storeu_pd(r.as_mut_ptr(), m, a);
53564	}
53565	let e = _mm256_setr_pd(`42.0`, `2.0`, `42.0`, `4.0`);
53566	assert_eq_m256d(unsafe { _mm256_loadu_pd(r.as_ptr()) }, e);
53567	}
53568
53569	#[simd_test(enable = "avx512f,avx512vl")]
53570	const fn test_mm256_mask_store_pd() {
53571	#[repr(align(`32`))]
53572	struct Align {
53573	data: [f64; `4`],
53574	}
53575	let mut r = Align { data: [`42.0`; `4`] };
53576	let a = _mm256_setr_pd(`1.0`, `2.0`, `3.0`, `4.0`);
53577	let m = `0b1010`;
53578	unsafe {
53579	_mm256_mask_store_pd(r.data.as_mut_ptr(), m, a);
53580	}
53581	let e = _mm256_setr_pd(`42.0`, `2.0`, `42.0`, `4.0`);
53582	assert_eq_m256d(unsafe { _mm256_load_pd(r.data.as_ptr()) }, e);
53583	}
53584
53585	#[simd_test(enable = "avx512f,avx512vl")]
53586	const fn test_mm_mask_loadu_epi32() {
53587	let src = _mm_set1_epi32(`42`);
53588	let a = &[`1_i32`, `2`, `3`, `4`];
53589	let p = a.as_ptr();
53590	let m = `0b1010`;
53591	let r = unsafe { _mm_mask_loadu_epi32(src, m, black_box(p)) };
53592	let e = _mm_setr_epi32(`42`, `2`, `42`, `4`);
53593	assert_eq_m128i(r, e);
53594	}
53595
53596	#[simd_test(enable = "avx512f,avx512vl")]
53597	const fn test_mm_maskz_loadu_epi32() {
53598	let a = &[`1_i32`, `2`, `3`, `4`];
53599	let p = a.as_ptr();
53600	let m = `0b1010`;
53601	let r = unsafe { _mm_maskz_loadu_epi32(m, black_box(p)) };
53602	let e = _mm_setr_epi32(`0`, `2`, `0`, `4`);
53603	assert_eq_m128i(r, e);
53604	}
53605
53606	#[simd_test(enable = "avx512f,avx512vl")]
53607	const fn test_mm_mask_load_epi32() {
53608	#[repr(align(`16`))]
53609	struct Align {
53610	data: [i32; `4`], // 32 bytes
53611	}
53612	let src = _mm_set1_epi32(`42`);
53613	let a = Align {
53614	data: [`1_i32`, `2`, `3`, `4`],
53615	};
53616	let p = a.data.as_ptr();
53617	let m = `0b1010`;
53618	let r = unsafe { _mm_mask_load_epi32(src, m, black_box(p)) };
53619	let e = _mm_setr_epi32(`42`, `2`, `42`, `4`);
53620	assert_eq_m128i(r, e);
53621	}
53622
53623	#[simd_test(enable = "avx512f,avx512vl")]
53624	const fn test_mm_maskz_load_epi32() {
53625	#[repr(align(`16`))]
53626	struct Align {
53627	data: [i32; `4`], // 16 bytes
53628	}
53629	let a = Align {
53630	data: [`1_i32`, `2`, `3`, `4`],
53631	};
53632	let p = a.data.as_ptr();
53633	let m = `0b1010`;
53634	let r = unsafe { _mm_maskz_load_epi32(m, black_box(p)) };
53635	let e = _mm_setr_epi32(`0`, `2`, `0`, `4`);
53636	assert_eq_m128i(r, e);
53637	}
53638
53639	#[simd_test(enable = "avx512f,avx512vl")]
53640	const fn test_mm_mask_storeu_epi32() {
53641	let mut r = [`42_i32`; `4`];
53642	let a = _mm_setr_epi32(`1`, `2`, `3`, `4`);
53643	let m = `0b1010`;
53644	unsafe {
53645	_mm_mask_storeu_epi32(r.as_mut_ptr(), m, a);
53646	}
53647	let e = _mm_setr_epi32(`42`, `2`, `42`, `4`);
53648	assert_eq_m128i(unsafe { _mm_loadu_epi32(r.as_ptr()) }, e);
53649	}
53650
53651	#[simd_test(enable = "avx512f,avx512vl")]
53652	const fn test_mm_mask_store_epi32() {
53653	#[repr(align(`16`))]
53654	struct Align {
53655	data: [i32; `4`], // 16 bytes
53656	}
53657	let mut r = Align { data: [`42`; `4`] };
53658	let a = _mm_setr_epi32(`1`, `2`, `3`, `4`);
53659	let m = `0b1010`;
53660	unsafe {
53661	_mm_mask_store_epi32(r.data.as_mut_ptr(), m, a);
53662	}
53663	let e = _mm_setr_epi32(`42`, `2`, `42`, `4`);
53664	assert_eq_m128i(unsafe { _mm_load_epi32(r.data.as_ptr()) }, e);
53665	}
53666
53667	#[simd_test(enable = "avx512f,avx512vl")]
53668	const fn test_mm_mask_loadu_epi64() {
53669	let src = _mm_set1_epi64x(`42`);
53670	let a = &[`1_i64`, `2`];
53671	let p = a.as_ptr();
53672	let m = `0b10`;
53673	let r = unsafe { _mm_mask_loadu_epi64(src, m, black_box(p)) };
53674	let e = _mm_setr_epi64x(`42`, `2`);
53675	assert_eq_m128i(r, e);
53676	}
53677
53678	#[simd_test(enable = "avx512f,avx512vl")]
53679	const fn test_mm_maskz_loadu_epi64() {
53680	let a = &[`1_i64`, `2`];
53681	let p = a.as_ptr();
53682	let m = `0b10`;
53683	let r = unsafe { _mm_maskz_loadu_epi64(m, black_box(p)) };
53684	let e = _mm_setr_epi64x(`0`, `2`);
53685	assert_eq_m128i(r, e);
53686	}
53687
53688	#[simd_test(enable = "avx512f,avx512vl")]
53689	const fn test_mm_mask_load_epi64() {
53690	#[repr(align(`16`))]
53691	struct Align {
53692	data: [i64; `2`], // 16 bytes
53693	}
53694	let src = _mm_set1_epi64x(`42`);
53695	let a = Align { data: [`1_i64`, `2`] };
53696	let p = a.data.as_ptr();
53697	let m = `0b10`;
53698	let r = unsafe { _mm_mask_load_epi64(src, m, black_box(p)) };
53699	let e = _mm_setr_epi64x(`42`, `2`);
53700	assert_eq_m128i(r, e);
53701	}
53702
53703	#[simd_test(enable = "avx512f,avx512vl")]
53704	const fn test_mm_maskz_load_epi64() {
53705	#[repr(align(`16`))]
53706	struct Align {
53707	data: [i64; `2`], // 16 bytes
53708	}
53709	let a = Align { data: [`1_i64`, `2`] };
53710	let p = a.data.as_ptr();
53711	let m = `0b10`;
53712	let r = unsafe { _mm_maskz_load_epi64(m, black_box(p)) };
53713	let e = _mm_setr_epi64x(`0`, `2`);
53714	assert_eq_m128i(r, e);
53715	}
53716
53717	#[simd_test(enable = "avx512f,avx512vl")]
53718	const fn test_mm_mask_storeu_epi64() {
53719	let mut r = [`42_i64`; `2`];
53720	let a = _mm_setr_epi64x(`1`, `2`);
53721	let m = `0b10`;
53722	unsafe {
53723	_mm_mask_storeu_epi64(r.as_mut_ptr(), m, a);
53724	}
53725	let e = _mm_setr_epi64x(`42`, `2`);
53726	assert_eq_m128i(unsafe { _mm_loadu_epi64(r.as_ptr()) }, e);
53727	}
53728
53729	#[simd_test(enable = "avx512f,avx512vl")]
53730	const fn test_mm_mask_store_epi64() {
53731	#[repr(align(`16`))]
53732	struct Align {
53733	data: [i64; `2`], // 16 bytes
53734	}
53735	let mut r = Align { data: [`42`; `2`] };
53736	let a = _mm_setr_epi64x(`1`, `2`);
53737	let m = `0b10`;
53738	unsafe {
53739	_mm_mask_store_epi64(r.data.as_mut_ptr(), m, a);
53740	}
53741	let e = _mm_setr_epi64x(`42`, `2`);
53742	assert_eq_m128i(unsafe { _mm_load_epi64(r.data.as_ptr()) }, e);
53743	}
53744
53745	#[simd_test(enable = "avx512f,avx512vl")]
53746	const fn test_mm_mask_loadu_ps() {
53747	let src = _mm_set1_ps(`42.0`);
53748	let a = &[`1.0_f32`, `2.0`, `3.0`, `4.0`];
53749	let p = a.as_ptr();
53750	let m = `0b1010`;
53751	let r = unsafe { _mm_mask_loadu_ps(src, m, black_box(p)) };
53752	let e = _mm_setr_ps(`42.0`, `2.0`, `42.0`, `4.0`);
53753	assert_eq_m128(r, e);
53754	}
53755
53756	#[simd_test(enable = "avx512f,avx512vl")]
53757	const fn test_mm_maskz_loadu_ps() {
53758	let a = &[`1.0_f32`, `2.0`, `3.0`, `4.0`];
53759	let p = a.as_ptr();
53760	let m = `0b1010`;
53761	let r = unsafe { _mm_maskz_loadu_ps(m, black_box(p)) };
53762	let e = _mm_setr_ps(`0.0`, `2.0`, `0.0`, `4.0`);
53763	assert_eq_m128(r, e);
53764	}
53765
53766	#[simd_test(enable = "avx512f,avx512vl")]
53767	const fn test_mm_mask_load_ps() {
53768	#[repr(align(`16`))]
53769	struct Align {
53770	data: [f32; `4`], // 16 bytes
53771	}
53772	let src = _mm_set1_ps(`42.0`);
53773	let a = Align {
53774	data: [`1.0_f32`, `2.0`, `3.0`, `4.0`],
53775	};
53776	let p = a.data.as_ptr();
53777	let m = `0b1010`;
53778	let r = unsafe { _mm_mask_load_ps(src, m, black_box(p)) };
53779	let e = _mm_setr_ps(`42.0`, `2.0`, `42.0`, `4.0`);
53780	assert_eq_m128(r, e);
53781	}
53782
53783	#[simd_test(enable = "avx512f,avx512vl")]
53784	const fn test_mm_maskz_load_ps() {
53785	#[repr(align(`16`))]
53786	struct Align {
53787	data: [f32; `4`], // 16 bytes
53788	}
53789	let a = Align {
53790	data: [`1.0_f32`, `2.0`, `3.0`, `4.0`],
53791	};
53792	let p = a.data.as_ptr();
53793	let m = `0b1010`;
53794	let r = unsafe { _mm_maskz_load_ps(m, black_box(p)) };
53795	let e = _mm_setr_ps(`0.0`, `2.0`, `0.0`, `4.0`);
53796	assert_eq_m128(r, e);
53797	}
53798
53799	#[simd_test(enable = "avx512f,avx512vl")]
53800	const fn test_mm_mask_storeu_ps() {
53801	let mut r = [`42_f32`; `4`];
53802	let a = _mm_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`);
53803	let m = `0b1010`;
53804	unsafe {
53805	_mm_mask_storeu_ps(r.as_mut_ptr(), m, a);
53806	}
53807	let e = _mm_setr_ps(`42.0`, `2.0`, `42.0`, `4.0`);
53808	assert_eq_m128(unsafe { _mm_loadu_ps(r.as_ptr()) }, e);
53809	}
53810
53811	#[simd_test(enable = "avx512f,avx512vl")]
53812	const fn test_mm_mask_store_ps() {
53813	#[repr(align(`16`))]
53814	struct Align {
53815	data: [f32; `4`], // 16 bytes
53816	}
53817	let mut r = Align { data: [`42.0`; `4`] };
53818	let a = _mm_setr_ps(`1.0`, `2.0`, `3.0`, `4.0`);
53819	let m = `0b1010`;
53820	unsafe {
53821	_mm_mask_store_ps(r.data.as_mut_ptr(), m, a);
53822	}
53823	let e = _mm_setr_ps(`42.0`, `2.0`, `42.0`, `4.0`);
53824	assert_eq_m128(unsafe { _mm_load_ps(r.data.as_ptr()) }, e);
53825	}
53826
53827	#[simd_test(enable = "avx512f,avx512vl")]
53828	const fn test_mm_mask_loadu_pd() {
53829	let src = _mm_set1_pd(`42.0`);
53830	let a = &[`1.0_f64`, `2.0`];
53831	let p = a.as_ptr();
53832	let m = `0b10`;
53833	let r = unsafe { _mm_mask_loadu_pd(src, m, black_box(p)) };
53834	let e = _mm_setr_pd(`42.0`, `2.0`);
53835	assert_eq_m128d(r, e);
53836	}
53837
53838	#[simd_test(enable = "avx512f,avx512vl")]
53839	const fn test_mm_maskz_loadu_pd() {
53840	let a = &[`1.0_f64`, `2.0`];
53841	let p = a.as_ptr();
53842	let m = `0b10`;
53843	let r = unsafe { _mm_maskz_loadu_pd(m, black_box(p)) };
53844	let e = _mm_setr_pd(`0.0`, `2.0`);
53845	assert_eq_m128d(r, e);
53846	}
53847
53848	#[simd_test(enable = "avx512f,avx512vl")]
53849	const fn test_mm_mask_load_pd() {
53850	#[repr(align(`16`))]
53851	struct Align {
53852	data: [f64; `2`], // 16 bytes
53853	}
53854	let src = _mm_set1_pd(`42.0`);
53855	let a = Align {
53856	data: [`1.0_f64`, `2.0`],
53857	};
53858	let p = a.data.as_ptr();
53859	let m = `0b10`;
53860	let r = unsafe { _mm_mask_load_pd(src, m, black_box(p)) };
53861	let e = _mm_setr_pd(`42.0`, `2.0`);
53862	assert_eq_m128d(r, e);
53863	}
53864
53865	#[simd_test(enable = "avx512f,avx512vl")]
53866	const fn test_mm_maskz_load_pd() {
53867	#[repr(align(`16`))]
53868	struct Align {
53869	data: [f64; `2`], // 16 bytes
53870	}
53871	let a = Align {
53872	data: [`1.0_f64`, `2.0`],
53873	};
53874	let p = a.data.as_ptr();
53875	let m = `0b10`;
53876	let r = unsafe { _mm_maskz_load_pd(m, black_box(p)) };
53877	let e = _mm_setr_pd(`0.0`, `2.0`);
53878	assert_eq_m128d(r, e);
53879	}
53880
53881	#[simd_test(enable = "avx512f")]
53882	fn test_mm_mask_load_ss() {
53883	#[repr(align(`16`))]
53884	struct Align {
53885	data: f32,
53886	}
53887	let src = _mm_set_ss(`2.0`);
53888	let mem = Align { data: `1.0` };
53889	let r = unsafe { _mm_mask_load_ss(src, `0b1`, &mem.data) };
53890	assert_eq_m128(r, _mm_set_ss(`1.0`));
53891	let r = unsafe { _mm_mask_load_ss(src, `0b0`, &mem.data) };
53892	assert_eq_m128(r, _mm_set_ss(`2.0`));
53893	}
53894
53895	#[simd_test(enable = "avx512f")]
53896	fn test_mm_maskz_load_ss() {
53897	#[repr(align(`16`))]
53898	struct Align {
53899	data: f32,
53900	}
53901	let mem = Align { data: `1.0` };
53902	let r = unsafe { _mm_maskz_load_ss(`0b1`, &mem.data) };
53903	assert_eq_m128(r, _mm_set_ss(`1.0`));
53904	let r = unsafe { _mm_maskz_load_ss(`0b0`, &mem.data) };
53905	assert_eq_m128(r, _mm_set_ss(`0.0`));
53906	}
53907
53908	#[simd_test(enable = "avx512f")]
53909	fn test_mm_mask_load_sd() {
53910	#[repr(align(`16`))]
53911	struct Align {
53912	data: f64,
53913	}
53914	let src = _mm_set_sd(`2.0`);
53915	let mem = Align { data: `1.0` };
53916	let r = unsafe { _mm_mask_load_sd(src, `0b1`, &mem.data) };
53917	assert_eq_m128d(r, _mm_set_sd(`1.0`));
53918	let r = unsafe { _mm_mask_load_sd(src, `0b0`, &mem.data) };
53919	assert_eq_m128d(r, _mm_set_sd(`2.0`));
53920	}
53921
53922	#[simd_test(enable = "avx512f")]
53923	fn test_mm_maskz_load_sd() {
53924	#[repr(align(`16`))]
53925	struct Align {
53926	data: f64,
53927	}
53928	let mem = Align { data: `1.0` };
53929	let r = unsafe { _mm_maskz_load_sd(`0b1`, &mem.data) };
53930	assert_eq_m128d(r, _mm_set_sd(`1.0`));
53931	let r = unsafe { _mm_maskz_load_sd(`0b0`, &mem.data) };
53932	assert_eq_m128d(r, _mm_set_sd(`0.0`));
53933	}
53934
53935	#[simd_test(enable = "avx512f,avx512vl")]
53936	const fn test_mm_mask_storeu_pd() {
53937	let mut r = [`42_f64`; `2`];
53938	let a = _mm_setr_pd(`1.0`, `2.0`);
53939	let m = `0b10`;
53940	unsafe {
53941	_mm_mask_storeu_pd(r.as_mut_ptr(), m, a);
53942	}
53943	let e = _mm_setr_pd(`42.0`, `2.0`);
53944	assert_eq_m128d(unsafe { _mm_loadu_pd(r.as_ptr()) }, e);
53945	}
53946
53947	#[simd_test(enable = "avx512f,avx512vl")]
53948	const fn test_mm_mask_store_pd() {
53949	#[repr(align(`16`))]
53950	struct Align {
53951	data: [f64; `2`], // 16 bytes
53952	}
53953	let mut r = Align { data: [`42.0`; `2`] };
53954	let a = _mm_setr_pd(`1.0`, `2.0`);
53955	let m = `0b10`;
53956	unsafe {
53957	_mm_mask_store_pd(r.data.as_mut_ptr(), m, a);
53958	}
53959	let e = _mm_setr_pd(`42.0`, `2.0`);
53960	assert_eq_m128d(unsafe { _mm_load_pd(r.data.as_ptr()) }, e);
53961	}
53962
53963	#[simd_test(enable = "avx512f")]
53964	fn test_mm_mask_store_ss() {
53965	#[repr(align(`16`))]
53966	struct Align {
53967	data: f32,
53968	}
53969	let a = _mm_set_ss(`2.0`);
53970	let mut mem = Align { data: `1.0` };
53971	unsafe {
53972	_mm_mask_store_ss(&mut mem.data, `0b1`, a);
53973	}
53974	assert_eq!(mem.data, `2.0`);
53975	unsafe {
53976	_mm_mask_store_ss(&mut mem.data, `0b0`, a);
53977	}
53978	assert_eq!(mem.data, `2.0`);
53979	}
53980
53981	#[simd_test(enable = "avx512f")]
53982	fn test_mm_mask_store_sd() {
53983	#[repr(align(`16`))]
53984	struct Align {
53985	data: f64,
53986	}
53987	let a = _mm_set_sd(`2.0`);
53988	let mut mem = Align { data: `1.0` };
53989	unsafe {
53990	_mm_mask_store_sd(&mut mem.data, `0b1`, a);
53991	}
53992	assert_eq!(mem.data, `2.0`);
53993	unsafe {
53994	_mm_mask_store_sd(&mut mem.data, `0b0`, a);
53995	}
53996	assert_eq!(mem.data, `2.0`);
53997	}
53998
53999	#[simd_test(enable = "avx512f")]
54000	const fn test_mm512_setr_pd() {
54001	let r = _mm512_set_pd(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
54002	assert_eq_m512d(r, _mm512_setr_pd(`7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`));
54003	}
54004
54005	#[simd_test(enable = "avx512f")]
54006	const fn test_mm512_set_pd() {
54007	let r = _mm512_setr_pd(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
54008	assert_eq_m512d(r, _mm512_set_pd(`7.`, `6.`, `5.`, `4.`, `3.`, `2.`, `1.`, `0.`));
54009	}
54010
54011	#[simd_test(enable = "avx512f")]
54012	const fn test_mm512_rol_epi32() {
54013	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54014	let r = _mm512_rol_epi32::<`1`>(a);
54015	let e = _mm512_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54016	assert_eq_m512i(r, e);
54017	}
54018
54019	#[simd_test(enable = "avx512f")]
54020	const fn test_mm512_mask_rol_epi32() {
54021	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54022	let r = _mm512_mask_rol_epi32::<`1`>(a, `0`, a);
54023	assert_eq_m512i(r, a);
54024	let r = _mm512_mask_rol_epi32::<`1`>(a, `0b11111111_11111111`, a);
54025	let e = _mm512_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54026	assert_eq_m512i(r, e);
54027	}
54028
54029	#[simd_test(enable = "avx512f")]
54030	const fn test_mm512_maskz_rol_epi32() {
54031	let a = _mm512_set_epi32(`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1` << `31`);
54032	let r = _mm512_maskz_rol_epi32::<`1`>(`0`, a);
54033	assert_eq_m512i(r, _mm512_setzero_si512());
54034	let r = _mm512_maskz_rol_epi32::<`1`>(`0b00000000_11111111`, a);
54035	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `1` << `0`);
54036	assert_eq_m512i(r, e);
54037	}
54038
54039	#[simd_test(enable = "avx512f,avx512vl")]
54040	const fn test_mm256_rol_epi32() {
54041	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54042	let r = _mm256_rol_epi32::<`1`>(a);
54043	let e = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54044	assert_eq_m256i(r, e);
54045	}
54046
54047	#[simd_test(enable = "avx512f,avx512vl")]
54048	const fn test_mm256_mask_rol_epi32() {
54049	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54050	let r = _mm256_mask_rol_epi32::<`1`>(a, `0`, a);
54051	assert_eq_m256i(r, a);
54052	let r = _mm256_mask_rol_epi32::<`1`>(a, `0b11111111`, a);
54053	let e = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54054	assert_eq_m256i(r, e);
54055	}
54056
54057	#[simd_test(enable = "avx512f,avx512vl")]
54058	const fn test_mm256_maskz_rol_epi32() {
54059	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54060	let r = _mm256_maskz_rol_epi32::<`1`>(`0`, a);
54061	assert_eq_m256i(r, _mm256_setzero_si256());
54062	let r = _mm256_maskz_rol_epi32::<`1`>(`0b11111111`, a);
54063	let e = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54064	assert_eq_m256i(r, e);
54065	}
54066
54067	#[simd_test(enable = "avx512f,avx512vl")]
54068	const fn test_mm_rol_epi32() {
54069	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
54070	let r = _mm_rol_epi32::<`1`>(a);
54071	let e = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
54072	assert_eq_m128i(r, e);
54073	}
54074
54075	#[simd_test(enable = "avx512f,avx512vl")]
54076	const fn test_mm_mask_rol_epi32() {
54077	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
54078	let r = _mm_mask_rol_epi32::<`1`>(a, `0`, a);
54079	assert_eq_m128i(r, a);
54080	let r = _mm_mask_rol_epi32::<`1`>(a, `0b00001111`, a);
54081	let e = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
54082	assert_eq_m128i(r, e);
54083	}
54084
54085	#[simd_test(enable = "avx512f,avx512vl")]
54086	const fn test_mm_maskz_rol_epi32() {
54087	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
54088	let r = _mm_maskz_rol_epi32::<`1`>(`0`, a);
54089	assert_eq_m128i(r, _mm_setzero_si128());
54090	let r = _mm_maskz_rol_epi32::<`1`>(`0b00001111`, a);
54091	let e = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
54092	assert_eq_m128i(r, e);
54093	}
54094
54095	#[simd_test(enable = "avx512f")]
54096	const fn test_mm512_ror_epi32() {
54097	let a = _mm512_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54098	let r = _mm512_ror_epi32::<`1`>(a);
54099	let e = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54100	assert_eq_m512i(r, e);
54101	}
54102
54103	#[simd_test(enable = "avx512f")]
54104	const fn test_mm512_mask_ror_epi32() {
54105	let a = _mm512_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54106	let r = _mm512_mask_ror_epi32::<`1`>(a, `0`, a);
54107	assert_eq_m512i(r, a);
54108	let r = _mm512_mask_ror_epi32::<`1`>(a, `0b11111111_11111111`, a);
54109	let e = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54110	assert_eq_m512i(r, e);
54111	}
54112
54113	#[simd_test(enable = "avx512f")]
54114	const fn test_mm512_maskz_ror_epi32() {
54115	let a = _mm512_set_epi32(`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `1` << `0`);
54116	let r = _mm512_maskz_ror_epi32::<`1`>(`0`, a);
54117	assert_eq_m512i(r, _mm512_setzero_si512());
54118	let r = _mm512_maskz_ror_epi32::<`1`>(`0b00000000_11111111`, a);
54119	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1` << `31`);
54120	assert_eq_m512i(r, e);
54121	}
54122
54123	#[simd_test(enable = "avx512f,avx512vl")]
54124	const fn test_mm256_ror_epi32() {
54125	let a = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54126	let r = _mm256_ror_epi32::<`1`>(a);
54127	let e = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54128	assert_eq_m256i(r, e);
54129	}
54130
54131	#[simd_test(enable = "avx512f,avx512vl")]
54132	const fn test_mm256_mask_ror_epi32() {
54133	let a = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54134	let r = _mm256_mask_ror_epi32::<`1`>(a, `0`, a);
54135	assert_eq_m256i(r, a);
54136	let r = _mm256_mask_ror_epi32::<`1`>(a, `0b11111111`, a);
54137	let e = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54138	assert_eq_m256i(r, e);
54139	}
54140
54141	#[simd_test(enable = "avx512f,avx512vl")]
54142	const fn test_mm256_maskz_ror_epi32() {
54143	let a = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54144	let r = _mm256_maskz_ror_epi32::<`1`>(`0`, a);
54145	assert_eq_m256i(r, _mm256_setzero_si256());
54146	let r = _mm256_maskz_ror_epi32::<`1`>(`0b11111111`, a);
54147	let e = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54148	assert_eq_m256i(r, e);
54149	}
54150
54151	#[simd_test(enable = "avx512f,avx512vl")]
54152	const fn test_mm_ror_epi32() {
54153	let a = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
54154	let r = _mm_ror_epi32::<`1`>(a);
54155	let e = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
54156	assert_eq_m128i(r, e);
54157	}
54158
54159	#[simd_test(enable = "avx512f,avx512vl")]
54160	const fn test_mm_mask_ror_epi32() {
54161	let a = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
54162	let r = _mm_mask_ror_epi32::<`1`>(a, `0`, a);
54163	assert_eq_m128i(r, a);
54164	let r = _mm_mask_ror_epi32::<`1`>(a, `0b00001111`, a);
54165	let e = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
54166	assert_eq_m128i(r, e);
54167	}
54168
54169	#[simd_test(enable = "avx512f,avx512vl")]
54170	const fn test_mm_maskz_ror_epi32() {
54171	let a = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
54172	let r = _mm_maskz_ror_epi32::<`1`>(`0`, a);
54173	assert_eq_m128i(r, _mm_setzero_si128());
54174	let r = _mm_maskz_ror_epi32::<`1`>(`0b00001111`, a);
54175	let e = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
54176	assert_eq_m128i(r, e);
54177	}
54178
54179	#[simd_test(enable = "avx512f")]
54180	const fn test_mm512_slli_epi32() {
54181	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54182	let r = _mm512_slli_epi32::<`1`>(a);
54183	let e = _mm512_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54184	assert_eq_m512i(r, e);
54185	}
54186
54187	#[simd_test(enable = "avx512f")]
54188	const fn test_mm512_mask_slli_epi32() {
54189	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54190	let r = _mm512_mask_slli_epi32::<`1`>(a, `0`, a);
54191	assert_eq_m512i(r, a);
54192	let r = _mm512_mask_slli_epi32::<`1`>(a, `0b11111111_11111111`, a);
54193	let e = _mm512_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54194	assert_eq_m512i(r, e);
54195	}
54196
54197	#[simd_test(enable = "avx512f")]
54198	const fn test_mm512_maskz_slli_epi32() {
54199	let a = _mm512_set_epi32(`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1` << `31`);
54200	let r = _mm512_maskz_slli_epi32::<`1`>(`0`, a);
54201	assert_eq_m512i(r, _mm512_setzero_si512());
54202	let r = _mm512_maskz_slli_epi32::<`1`>(`0b00000000_11111111`, a);
54203	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `0`);
54204	assert_eq_m512i(r, e);
54205	}
54206
54207	#[simd_test(enable = "avx512f,avx512vl")]
54208	const fn test_mm256_mask_slli_epi32() {
54209	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54210	let r = _mm256_mask_slli_epi32::<`1`>(a, `0`, a);
54211	assert_eq_m256i(r, a);
54212	let r = _mm256_mask_slli_epi32::<`1`>(a, `0b11111111`, a);
54213	let e = _mm256_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54214	assert_eq_m256i(r, e);
54215	}
54216
54217	#[simd_test(enable = "avx512f,avx512vl")]
54218	const fn test_mm256_maskz_slli_epi32() {
54219	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54220	let r = _mm256_maskz_slli_epi32::<`1`>(`0`, a);
54221	assert_eq_m256i(r, _mm256_setzero_si256());
54222	let r = _mm256_maskz_slli_epi32::<`1`>(`0b11111111`, a);
54223	let e = _mm256_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54224	assert_eq_m256i(r, e);
54225	}
54226
54227	#[simd_test(enable = "avx512f,avx512vl")]
54228	const fn test_mm_mask_slli_epi32() {
54229	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
54230	let r = _mm_mask_slli_epi32::<`1`>(a, `0`, a);
54231	assert_eq_m128i(r, a);
54232	let r = _mm_mask_slli_epi32::<`1`>(a, `0b00001111`, a);
54233	let e = _mm_set_epi32(`0`, `2`, `2`, `2`);
54234	assert_eq_m128i(r, e);
54235	}
54236
54237	#[simd_test(enable = "avx512f,avx512vl")]
54238	const fn test_mm_maskz_slli_epi32() {
54239	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
54240	let r = _mm_maskz_slli_epi32::<`1`>(`0`, a);
54241	assert_eq_m128i(r, _mm_setzero_si128());
54242	let r = _mm_maskz_slli_epi32::<`1`>(`0b00001111`, a);
54243	let e = _mm_set_epi32(`0`, `2`, `2`, `2`);
54244	assert_eq_m128i(r, e);
54245	}
54246
54247	#[simd_test(enable = "avx512f")]
54248	const fn test_mm512_srli_epi32() {
54249	let a = _mm512_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54250	let r = _mm512_srli_epi32::<`1`>(a);
54251	let e = _mm512_set_epi32(`0` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54252	assert_eq_m512i(r, e);
54253	}
54254
54255	#[simd_test(enable = "avx512f")]
54256	const fn test_mm512_mask_srli_epi32() {
54257	let a = _mm512_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54258	let r = _mm512_mask_srli_epi32::<`1`>(a, `0`, a);
54259	assert_eq_m512i(r, a);
54260	let r = _mm512_mask_srli_epi32::<`1`>(a, `0b11111111_11111111`, a);
54261	let e = _mm512_set_epi32(`0` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54262	assert_eq_m512i(r, e);
54263	}
54264
54265	#[simd_test(enable = "avx512f")]
54266	const fn test_mm512_maskz_srli_epi32() {
54267	let a = _mm512_set_epi32(`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `0`);
54268	let r = _mm512_maskz_srli_epi32::<`1`>(`0`, a);
54269	assert_eq_m512i(r, _mm512_setzero_si512());
54270	let r = _mm512_maskz_srli_epi32::<`1`>(`0b00000000_11111111`, a);
54271	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0` << `31`);
54272	assert_eq_m512i(r, e);
54273	}
54274
54275	#[simd_test(enable = "avx512f,avx512vl")]
54276	const fn test_mm256_mask_srli_epi32() {
54277	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54278	let r = _mm256_mask_srli_epi32::<`1`>(a, `0`, a);
54279	assert_eq_m256i(r, a);
54280	let r = _mm256_mask_srli_epi32::<`1`>(a, `0b11111111`, a);
54281	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54282	assert_eq_m256i(r, e);
54283	}
54284
54285	#[simd_test(enable = "avx512f,avx512vl")]
54286	const fn test_mm256_maskz_srli_epi32() {
54287	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54288	let r = _mm256_maskz_srli_epi32::<`1`>(`0`, a);
54289	assert_eq_m256i(r, _mm256_setzero_si256());
54290	let r = _mm256_maskz_srli_epi32::<`1`>(`0b11111111`, a);
54291	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54292	assert_eq_m256i(r, e);
54293	}
54294
54295	#[simd_test(enable = "avx512f,avx512vl")]
54296	const fn test_mm_mask_srli_epi32() {
54297	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
54298	let r = _mm_mask_srli_epi32::<`1`>(a, `0`, a);
54299	assert_eq_m128i(r, a);
54300	let r = _mm_mask_srli_epi32::<`1`>(a, `0b00001111`, a);
54301	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
54302	assert_eq_m128i(r, e);
54303	}
54304
54305	#[simd_test(enable = "avx512f,avx512vl")]
54306	const fn test_mm_maskz_srli_epi32() {
54307	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
54308	let r = _mm_maskz_srli_epi32::<`1`>(`0`, a);
54309	assert_eq_m128i(r, _mm_setzero_si128());
54310	let r = _mm_maskz_srli_epi32::<`1`>(`0b00001111`, a);
54311	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
54312	assert_eq_m128i(r, e);
54313	}
54314
54315	#[simd_test(enable = "avx512f")]
54316	const fn test_mm512_rolv_epi32() {
54317	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54318	let b = _mm512_set1_epi32(`1`);
54319	let r = _mm512_rolv_epi32(a, b);
54320	let e = _mm512_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54321	assert_eq_m512i(r, e);
54322	}
54323
54324	#[simd_test(enable = "avx512f")]
54325	const fn test_mm512_mask_rolv_epi32() {
54326	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54327	let b = _mm512_set1_epi32(`1`);
54328	let r = _mm512_mask_rolv_epi32(a, `0`, a, b);
54329	assert_eq_m512i(r, a);
54330	let r = _mm512_mask_rolv_epi32(a, `0b11111111_11111111`, a, b);
54331	let e = _mm512_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54332	assert_eq_m512i(r, e);
54333	}
54334
54335	#[simd_test(enable = "avx512f")]
54336	const fn test_mm512_maskz_rolv_epi32() {
54337	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1` << `31`);
54338	let b = _mm512_set1_epi32(`1`);
54339	let r = _mm512_maskz_rolv_epi32(`0`, a, b);
54340	assert_eq_m512i(r, _mm512_setzero_si512());
54341	let r = _mm512_maskz_rolv_epi32(`0b00000000_11111111`, a, b);
54342	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `1` << `0`);
54343	assert_eq_m512i(r, e);
54344	}
54345
54346	#[simd_test(enable = "avx512f,avx512vl")]
54347	const fn test_mm256_rolv_epi32() {
54348	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54349	let b = _mm256_set1_epi32(`1`);
54350	let r = _mm256_rolv_epi32(a, b);
54351	let e = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54352	assert_eq_m256i(r, e);
54353	}
54354
54355	#[simd_test(enable = "avx512f,avx512vl")]
54356	const fn test_mm256_mask_rolv_epi32() {
54357	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54358	let b = _mm256_set1_epi32(`1`);
54359	let r = _mm256_mask_rolv_epi32(a, `0`, a, b);
54360	assert_eq_m256i(r, a);
54361	let r = _mm256_mask_rolv_epi32(a, `0b11111111`, a, b);
54362	let e = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54363	assert_eq_m256i(r, e);
54364	}
54365
54366	#[simd_test(enable = "avx512f,avx512vl")]
54367	const fn test_mm256_maskz_rolv_epi32() {
54368	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54369	let b = _mm256_set1_epi32(`1`);
54370	let r = _mm256_maskz_rolv_epi32(`0`, a, b);
54371	assert_eq_m256i(r, _mm256_setzero_si256());
54372	let r = _mm256_maskz_rolv_epi32(`0b11111111`, a, b);
54373	let e = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54374	assert_eq_m256i(r, e);
54375	}
54376
54377	#[simd_test(enable = "avx512f,avx512vl")]
54378	const fn test_mm_rolv_epi32() {
54379	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
54380	let b = _mm_set1_epi32(`1`);
54381	let r = _mm_rolv_epi32(a, b);
54382	let e = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
54383	assert_eq_m128i(r, e);
54384	}
54385
54386	#[simd_test(enable = "avx512f,avx512vl")]
54387	const fn test_mm_mask_rolv_epi32() {
54388	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
54389	let b = _mm_set1_epi32(`1`);
54390	let r = _mm_mask_rolv_epi32(a, `0`, a, b);
54391	assert_eq_m128i(r, a);
54392	let r = _mm_mask_rolv_epi32(a, `0b00001111`, a, b);
54393	let e = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
54394	assert_eq_m128i(r, e);
54395	}
54396
54397	#[simd_test(enable = "avx512f,avx512vl")]
54398	const fn test_mm_maskz_rolv_epi32() {
54399	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
54400	let b = _mm_set1_epi32(`1`);
54401	let r = _mm_maskz_rolv_epi32(`0`, a, b);
54402	assert_eq_m128i(r, _mm_setzero_si128());
54403	let r = _mm_maskz_rolv_epi32(`0b00001111`, a, b);
54404	let e = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
54405	assert_eq_m128i(r, e);
54406	}
54407
54408	#[simd_test(enable = "avx512f")]
54409	const fn test_mm512_rorv_epi32() {
54410	let a = _mm512_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54411	let b = _mm512_set1_epi32(`1`);
54412	let r = _mm512_rorv_epi32(a, b);
54413	let e = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54414	assert_eq_m512i(r, e);
54415	}
54416
54417	#[simd_test(enable = "avx512f")]
54418	const fn test_mm512_mask_rorv_epi32() {
54419	let a = _mm512_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54420	let b = _mm512_set1_epi32(`1`);
54421	let r = _mm512_mask_rorv_epi32(a, `0`, a, b);
54422	assert_eq_m512i(r, a);
54423	let r = _mm512_mask_rorv_epi32(a, `0b11111111_11111111`, a, b);
54424	let e = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54425	assert_eq_m512i(r, e);
54426	}
54427
54428	#[simd_test(enable = "avx512f")]
54429	const fn test_mm512_maskz_rorv_epi32() {
54430	let a = _mm512_set_epi32(`3`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `1` << `0`);
54431	let b = _mm512_set1_epi32(`1`);
54432	let r = _mm512_maskz_rorv_epi32(`0`, a, b);
54433	assert_eq_m512i(r, _mm512_setzero_si512());
54434	let r = _mm512_maskz_rorv_epi32(`0b00000000_11111111`, a, b);
54435	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1` << `31`);
54436	assert_eq_m512i(r, e);
54437	}
54438
54439	#[simd_test(enable = "avx512f,avx512vl")]
54440	const fn test_mm256_rorv_epi32() {
54441	let a = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54442	let b = _mm256_set1_epi32(`1`);
54443	let r = _mm256_rorv_epi32(a, b);
54444	let e = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54445	assert_eq_m256i(r, e);
54446	}
54447
54448	#[simd_test(enable = "avx512f,avx512vl")]
54449	const fn test_mm256_mask_rorv_epi32() {
54450	let a = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54451	let b = _mm256_set1_epi32(`1`);
54452	let r = _mm256_mask_rorv_epi32(a, `0`, a, b);
54453	assert_eq_m256i(r, a);
54454	let r = _mm256_mask_rorv_epi32(a, `0b11111111`, a, b);
54455	let e = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54456	assert_eq_m256i(r, e);
54457	}
54458
54459	#[simd_test(enable = "avx512f,avx512vl")]
54460	const fn test_mm256_maskz_rorv_epi32() {
54461	let a = _mm256_set_epi32(`1` << `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54462	let b = _mm256_set1_epi32(`1`);
54463	let r = _mm256_maskz_rorv_epi32(`0`, a, b);
54464	assert_eq_m256i(r, _mm256_setzero_si256());
54465	let r = _mm256_maskz_rorv_epi32(`0b11111111`, a, b);
54466	let e = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54467	assert_eq_m256i(r, e);
54468	}
54469
54470	#[simd_test(enable = "avx512f,avx512vl")]
54471	const fn test_mm_rorv_epi32() {
54472	let a = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
54473	let b = _mm_set1_epi32(`1`);
54474	let r = _mm_rorv_epi32(a, b);
54475	let e = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
54476	assert_eq_m128i(r, e);
54477	}
54478
54479	#[simd_test(enable = "avx512f,avx512vl")]
54480	const fn test_mm_mask_rorv_epi32() {
54481	let a = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
54482	let b = _mm_set1_epi32(`1`);
54483	let r = _mm_mask_rorv_epi32(a, `0`, a, b);
54484	assert_eq_m128i(r, a);
54485	let r = _mm_mask_rorv_epi32(a, `0b00001111`, a, b);
54486	let e = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
54487	assert_eq_m128i(r, e);
54488	}
54489
54490	#[simd_test(enable = "avx512f,avx512vl")]
54491	const fn test_mm_maskz_rorv_epi32() {
54492	let a = _mm_set_epi32(`1` << `0`, `2`, `2`, `2`);
54493	let b = _mm_set1_epi32(`1`);
54494	let r = _mm_maskz_rorv_epi32(`0`, a, b);
54495	assert_eq_m128i(r, _mm_setzero_si128());
54496	let r = _mm_maskz_rorv_epi32(`0b00001111`, a, b);
54497	let e = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
54498	assert_eq_m128i(r, e);
54499	}
54500
54501	#[simd_test(enable = "avx512f")]
54502	const fn test_mm512_sllv_epi32() {
54503	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54504	let count = _mm512_set1_epi32(`1`);
54505	let r = _mm512_sllv_epi32(a, count);
54506	let e = _mm512_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54507	assert_eq_m512i(r, e);
54508	}
54509
54510	#[simd_test(enable = "avx512f")]
54511	const fn test_mm512_mask_sllv_epi32() {
54512	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54513	let count = _mm512_set1_epi32(`1`);
54514	let r = _mm512_mask_sllv_epi32(a, `0`, a, count);
54515	assert_eq_m512i(r, a);
54516	let r = _mm512_mask_sllv_epi32(a, `0b11111111_11111111`, a, count);
54517	let e = _mm512_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54518	assert_eq_m512i(r, e);
54519	}
54520
54521	#[simd_test(enable = "avx512f")]
54522	const fn test_mm512_maskz_sllv_epi32() {
54523	let a = _mm512_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1` << `31`);
54524	let count = _mm512_set_epi32(`0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54525	let r = _mm512_maskz_sllv_epi32(`0`, a, count);
54526	assert_eq_m512i(r, _mm512_setzero_si512());
54527	let r = _mm512_maskz_sllv_epi32(`0b00000000_11111111`, a, count);
54528	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `0`);
54529	assert_eq_m512i(r, e);
54530	}
54531
54532	#[simd_test(enable = "avx512f,avx512vl")]
54533	const fn test_mm256_mask_sllv_epi32() {
54534	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54535	let count = _mm256_set1_epi32(`1`);
54536	let r = _mm256_mask_sllv_epi32(a, `0`, a, count);
54537	assert_eq_m256i(r, a);
54538	let r = _mm256_mask_sllv_epi32(a, `0b11111111`, a, count);
54539	let e = _mm256_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54540	assert_eq_m256i(r, e);
54541	}
54542
54543	#[simd_test(enable = "avx512f,avx512vl")]
54544	const fn test_mm256_maskz_sllv_epi32() {
54545	let a = _mm256_set_epi32(`1` << `31`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54546	let count = _mm256_set1_epi32(`1`);
54547	let r = _mm256_maskz_sllv_epi32(`0`, a, count);
54548	assert_eq_m256i(r, _mm256_setzero_si256());
54549	let r = _mm256_maskz_sllv_epi32(`0b11111111`, a, count);
54550	let e = _mm256_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54551	assert_eq_m256i(r, e);
54552	}
54553
54554	#[simd_test(enable = "avx512f,avx512vl")]
54555	const fn test_mm_mask_sllv_epi32() {
54556	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
54557	let count = _mm_set1_epi32(`1`);
54558	let r = _mm_mask_sllv_epi32(a, `0`, a, count);
54559	assert_eq_m128i(r, a);
54560	let r = _mm_mask_sllv_epi32(a, `0b00001111`, a, count);
54561	let e = _mm_set_epi32(`0`, `2`, `2`, `2`);
54562	assert_eq_m128i(r, e);
54563	}
54564
54565	#[simd_test(enable = "avx512f,avx512vl")]
54566	const fn test_mm_maskz_sllv_epi32() {
54567	let a = _mm_set_epi32(`1` << `31`, `1`, `1`, `1`);
54568	let count = _mm_set1_epi32(`1`);
54569	let r = _mm_maskz_sllv_epi32(`0`, a, count);
54570	assert_eq_m128i(r, _mm_setzero_si128());
54571	let r = _mm_maskz_sllv_epi32(`0b00001111`, a, count);
54572	let e = _mm_set_epi32(`0`, `2`, `2`, `2`);
54573	assert_eq_m128i(r, e);
54574	}
54575
54576	#[simd_test(enable = "avx512f")]
54577	const fn test_mm512_srlv_epi32() {
54578	let a = _mm512_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54579	let count = _mm512_set1_epi32(`1`);
54580	let r = _mm512_srlv_epi32(a, count);
54581	let e = _mm512_set_epi32(`0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54582	assert_eq_m512i(r, e);
54583	}
54584
54585	#[simd_test(enable = "avx512f")]
54586	const fn test_mm512_mask_srlv_epi32() {
54587	let a = _mm512_set_epi32(`0`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
54588	let count = _mm512_set1_epi32(`1`);
54589	let r = _mm512_mask_srlv_epi32(a, `0`, a, count);
54590	assert_eq_m512i(r, a);
54591	let r = _mm512_mask_srlv_epi32(a, `0b11111111_11111111`, a, count);
54592	let e = _mm512_set_epi32(`0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54593	assert_eq_m512i(r, e);
54594	}
54595
54596	#[simd_test(enable = "avx512f")]
54597	const fn test_mm512_maskz_srlv_epi32() {
54598	let a = _mm512_set_epi32(`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `0`);
54599	let count = _mm512_set_epi32(`0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
54600	let r = _mm512_maskz_srlv_epi32(`0`, a, count);
54601	assert_eq_m512i(r, _mm512_setzero_si512());
54602	let r = _mm512_maskz_srlv_epi32(`0b00000000_11111111`, a, count);
54603	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`);
54604	assert_eq_m512i(r, e);
54605	}
54606
54607	#[simd_test(enable = "avx512f,avx512vl")]
54608	const fn test_mm256_mask_srlv_epi32() {
54609	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54610	let count = _mm256_set1_epi32(`1`);
54611	let r = _mm256_mask_srlv_epi32(a, `0`, a, count);
54612	assert_eq_m256i(r, a);
54613	let r = _mm256_mask_srlv_epi32(a, `0b11111111`, a, count);
54614	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54615	assert_eq_m256i(r, e);
54616	}
54617
54618	#[simd_test(enable = "avx512f,avx512vl")]
54619	const fn test_mm256_maskz_srlv_epi32() {
54620	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54621	let count = _mm256_set1_epi32(`1`);
54622	let r = _mm256_maskz_srlv_epi32(`0`, a, count);
54623	assert_eq_m256i(r, _mm256_setzero_si256());
54624	let r = _mm256_maskz_srlv_epi32(`0b11111111`, a, count);
54625	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54626	assert_eq_m256i(r, e);
54627	}
54628
54629	#[simd_test(enable = "avx512f,avx512vl")]
54630	const fn test_mm_mask_srlv_epi32() {
54631	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
54632	let count = _mm_set1_epi32(`1`);
54633	let r = _mm_mask_srlv_epi32(a, `0`, a, count);
54634	assert_eq_m128i(r, a);
54635	let r = _mm_mask_srlv_epi32(a, `0b00001111`, a, count);
54636	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
54637	assert_eq_m128i(r, e);
54638	}
54639
54640	#[simd_test(enable = "avx512f,avx512vl")]
54641	const fn test_mm_maskz_srlv_epi32() {
54642	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
54643	let count = _mm_set1_epi32(`1`);
54644	let r = _mm_maskz_srlv_epi32(`0`, a, count);
54645	assert_eq_m128i(r, _mm_setzero_si128());
54646	let r = _mm_maskz_srlv_epi32(`0b00001111`, a, count);
54647	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
54648	assert_eq_m128i(r, e);
54649	}
54650
54651	#[simd_test(enable = "avx512f")]
54652	fn test_mm512_sll_epi32() {
54653	#[rustfmt::skip]
54654	let a = _mm512_set_epi32(
54655	`1` << `31`, `1` << `0`, `1` << `1`, `1` << `2`,
54656	`0`, `0`, `0`, `0`,
54657	`0`, `0`, `0`, `0`,
54658	`0`, `0`, `0`, `0`,
54659	);
54660	let count = _mm_set_epi32(`0`, `0`, `0`, `2`);
54661	let r = _mm512_sll_epi32(a, count);
54662	#[rustfmt::skip]
54663	let e = _mm512_set_epi32(
54664	`0`, `1` << `2`, `1` << `3`, `1` << `4`,
54665	`0`, `0`, `0`, `0`,
54666	`0`, `0`, `0`, `0`,
54667	`0`, `0`, `0`, `0`,
54668	);
54669	assert_eq_m512i(r, e);
54670	}
54671
54672	#[simd_test(enable = "avx512f")]
54673	fn test_mm512_mask_sll_epi32() {
54674	#[rustfmt::skip]
54675	let a = _mm512_set_epi32(
54676	`1` << `31`, `1` << `0`, `1` << `1`, `1` << `2`,
54677	`0`, `0`, `0`, `0`,
54678	`0`, `0`, `0`, `0`,
54679	`0`, `0`, `0`, `0`,
54680	);
54681	let count = _mm_set_epi32(`0`, `0`, `0`, `2`);
54682	let r = _mm512_mask_sll_epi32(a, `0`, a, count);
54683	assert_eq_m512i(r, a);
54684	let r = _mm512_mask_sll_epi32(a, `0b11111111_11111111`, a, count);
54685	#[rustfmt::skip]
54686	let e = _mm512_set_epi32(
54687	`0`, `1` << `2`, `1` << `3`, `1` << `4`,
54688	`0`, `0`, `0`, `0`,
54689	`0`, `0`, `0`, `0`,
54690	`0`, `0`, `0`, `0`,
54691	);
54692	assert_eq_m512i(r, e);
54693	}
54694
54695	#[simd_test(enable = "avx512f")]
54696	fn test_mm512_maskz_sll_epi32() {
54697	#[rustfmt::skip]
54698	let a = _mm512_set_epi32(
54699	`1` << `31`, `1` << `0`, `1` << `1`, `1` << `2`,
54700	`0`, `0`, `0`, `0`,
54701	`0`, `0`, `0`, `0`,
54702	`0`, `0`, `0`, `1` << `31`,
54703	);
54704	let count = _mm_set_epi32(`2`, `0`, `0`, `2`);
54705	let r = _mm512_maskz_sll_epi32(`0`, a, count);
54706	assert_eq_m512i(r, _mm512_setzero_si512());
54707	let r = _mm512_maskz_sll_epi32(`0b00000000_11111111`, a, count);
54708	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54709	assert_eq_m512i(r, e);
54710	}
54711
54712	#[simd_test(enable = "avx512f,avx512vl")]
54713	fn test_mm256_mask_sll_epi32() {
54714	let a = _mm256_set_epi32(`1` << `13`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54715	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
54716	let r = _mm256_mask_sll_epi32(a, `0`, a, count);
54717	assert_eq_m256i(r, a);
54718	let r = _mm256_mask_sll_epi32(a, `0b11111111`, a, count);
54719	let e = _mm256_set_epi32(`1` << `14`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54720	assert_eq_m256i(r, e);
54721	}
54722
54723	#[simd_test(enable = "avx512f,avx512vl")]
54724	fn test_mm256_maskz_sll_epi32() {
54725	let a = _mm256_set_epi32(`1` << `13`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54726	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
54727	let r = _mm256_maskz_sll_epi32(`0`, a, count);
54728	assert_eq_m256i(r, _mm256_setzero_si256());
54729	let r = _mm256_maskz_sll_epi32(`0b11111111`, a, count);
54730	let e = _mm256_set_epi32(`1` << `14`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54731	assert_eq_m256i(r, e);
54732	}
54733
54734	#[simd_test(enable = "avx512f,avx512vl")]
54735	fn test_mm_mask_sll_epi32() {
54736	let a = _mm_set_epi32(`1` << `13`, `0`, `0`, `0`);
54737	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
54738	let r = _mm_mask_sll_epi32(a, `0`, a, count);
54739	assert_eq_m128i(r, a);
54740	let r = _mm_mask_sll_epi32(a, `0b00001111`, a, count);
54741	let e = _mm_set_epi32(`1` << `14`, `0`, `0`, `0`);
54742	assert_eq_m128i(r, e);
54743	}
54744
54745	#[simd_test(enable = "avx512f,avx512vl")]
54746	fn test_mm_maskz_sll_epi32() {
54747	let a = _mm_set_epi32(`1` << `13`, `0`, `0`, `0`);
54748	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
54749	let r = _mm_maskz_sll_epi32(`0`, a, count);
54750	assert_eq_m128i(r, _mm_setzero_si128());
54751	let r = _mm_maskz_sll_epi32(`0b00001111`, a, count);
54752	let e = _mm_set_epi32(`1` << `14`, `0`, `0`, `0`);
54753	assert_eq_m128i(r, e);
54754	}
54755
54756	#[simd_test(enable = "avx512f")]
54757	fn test_mm512_srl_epi32() {
54758	#[rustfmt::skip]
54759	let a = _mm512_set_epi32(
54760	`1` << `31`, `1` << `0`, `1` << `1`, `1` << `2`,
54761	`0`, `0`, `0`, `0`,
54762	`0`, `0`, `0`, `0`,
54763	`0`, `0`, `0`, `0`,
54764	);
54765	let count = _mm_set_epi32(`0`, `0`, `0`, `2`);
54766	let r = _mm512_srl_epi32(a, count);
54767	let e = _mm512_set_epi32(`1` << `29`, `0`, `0`, `1` << `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54768	assert_eq_m512i(r, e);
54769	}
54770
54771	#[simd_test(enable = "avx512f")]
54772	fn test_mm512_mask_srl_epi32() {
54773	#[rustfmt::skip]
54774	let a = _mm512_set_epi32(
54775	`1` << `31`, `1` << `0`, `1` << `1`, `1` << `2`,
54776	`0`, `0`, `0`, `0`,
54777	`0`, `0`, `0`, `0`,
54778	`0`, `0`, `0`, `0`,
54779	);
54780	let count = _mm_set_epi32(`0`, `0`, `0`, `2`);
54781	let r = _mm512_mask_srl_epi32(a, `0`, a, count);
54782	assert_eq_m512i(r, a);
54783	let r = _mm512_mask_srl_epi32(a, `0b11111111_11111111`, a, count);
54784	let e = _mm512_set_epi32(`1` << `29`, `0`, `0`, `1` << `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54785	assert_eq_m512i(r, e);
54786	}
54787
54788	#[simd_test(enable = "avx512f")]
54789	fn test_mm512_maskz_srl_epi32() {
54790	#[rustfmt::skip]
54791	let a = _mm512_set_epi32(
54792	`1` << `31`, `1` << `0`, `1` << `1`, `1` << `2`,
54793	`0`, `0`, `0`, `0`,
54794	`0`, `0`, `0`, `0`,
54795	`0`, `0`, `0`, `1` << `31`,
54796	);
54797	let count = _mm_set_epi32(`2`, `0`, `0`, `2`);
54798	let r = _mm512_maskz_srl_epi32(`0`, a, count);
54799	assert_eq_m512i(r, _mm512_setzero_si512());
54800	let r = _mm512_maskz_srl_epi32(`0b00000000_11111111`, a, count);
54801	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1` << `29`);
54802	assert_eq_m512i(r, e);
54803	}
54804
54805	#[simd_test(enable = "avx512f,avx512vl")]
54806	fn test_mm256_mask_srl_epi32() {
54807	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54808	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
54809	let r = _mm256_mask_srl_epi32(a, `0`, a, count);
54810	assert_eq_m256i(r, a);
54811	let r = _mm256_mask_srl_epi32(a, `0b11111111`, a, count);
54812	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54813	assert_eq_m256i(r, e);
54814	}
54815
54816	#[simd_test(enable = "avx512f,avx512vl")]
54817	fn test_mm256_maskz_srl_epi32() {
54818	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54819	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
54820	let r = _mm256_maskz_srl_epi32(`0`, a, count);
54821	assert_eq_m256i(r, _mm256_setzero_si256());
54822	let r = _mm256_maskz_srl_epi32(`0b11111111`, a, count);
54823	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54824	assert_eq_m256i(r, e);
54825	}
54826
54827	#[simd_test(enable = "avx512f,avx512vl")]
54828	fn test_mm_mask_srl_epi32() {
54829	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
54830	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
54831	let r = _mm_mask_srl_epi32(a, `0`, a, count);
54832	assert_eq_m128i(r, a);
54833	let r = _mm_mask_srl_epi32(a, `0b00001111`, a, count);
54834	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
54835	assert_eq_m128i(r, e);
54836	}
54837
54838	#[simd_test(enable = "avx512f,avx512vl")]
54839	fn test_mm_maskz_srl_epi32() {
54840	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
54841	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
54842	let r = _mm_maskz_srl_epi32(`0`, a, count);
54843	assert_eq_m128i(r, _mm_setzero_si128());
54844	let r = _mm_maskz_srl_epi32(`0b00001111`, a, count);
54845	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
54846	assert_eq_m128i(r, e);
54847	}
54848
54849	#[simd_test(enable = "avx512f")]
54850	fn test_mm512_sra_epi32() {
54851	let a = _mm512_set_epi32(`8`, `-8`, `16`, `-15`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`);
54852	let count = _mm_set_epi32(`1`, `0`, `0`, `2`);
54853	let r = _mm512_sra_epi32(a, count);
54854	let e = _mm512_set_epi32(`2`, `-2`, `4`, `-4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54855	assert_eq_m512i(r, e);
54856	}
54857
54858	#[simd_test(enable = "avx512f")]
54859	fn test_mm512_mask_sra_epi32() {
54860	let a = _mm512_set_epi32(`8`, `-8`, `16`, `-15`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `16`);
54861	let count = _mm_set_epi32(`0`, `0`, `0`, `2`);
54862	let r = _mm512_mask_sra_epi32(a, `0`, a, count);
54863	assert_eq_m512i(r, a);
54864	let r = _mm512_mask_sra_epi32(a, `0b11111111_11111111`, a, count);
54865	let e = _mm512_set_epi32(`2`, `-2`, `4`, `-4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `4`);
54866	assert_eq_m512i(r, e);
54867	}
54868
54869	#[simd_test(enable = "avx512f")]
54870	fn test_mm512_maskz_sra_epi32() {
54871	let a = _mm512_set_epi32(`8`, `-8`, `16`, `-15`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `-15`, `-14`);
54872	let count = _mm_set_epi32(`2`, `0`, `0`, `2`);
54873	let r = _mm512_maskz_sra_epi32(`0`, a, count);
54874	assert_eq_m512i(r, _mm512_setzero_si512());
54875	let r = _mm512_maskz_sra_epi32(`0b00000000_11111111`, a, count);
54876	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `-4`, `-4`);
54877	assert_eq_m512i(r, e);
54878	}
54879
54880	#[simd_test(enable = "avx512f,avx512vl")]
54881	fn test_mm256_mask_sra_epi32() {
54882	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54883	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
54884	let r = _mm256_mask_sra_epi32(a, `0`, a, count);
54885	assert_eq_m256i(r, a);
54886	let r = _mm256_mask_sra_epi32(a, `0b11111111`, a, count);
54887	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54888	assert_eq_m256i(r, e);
54889	}
54890
54891	#[simd_test(enable = "avx512f,avx512vl")]
54892	fn test_mm256_maskz_sra_epi32() {
54893	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54894	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
54895	let r = _mm256_maskz_sra_epi32(`0`, a, count);
54896	assert_eq_m256i(r, _mm256_setzero_si256());
54897	let r = _mm256_maskz_sra_epi32(`0b11111111`, a, count);
54898	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54899	assert_eq_m256i(r, e);
54900	}
54901
54902	#[simd_test(enable = "avx512f,avx512vl")]
54903	fn test_mm_mask_sra_epi32() {
54904	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
54905	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
54906	let r = _mm_mask_sra_epi32(a, `0`, a, count);
54907	assert_eq_m128i(r, a);
54908	let r = _mm_mask_sra_epi32(a, `0b00001111`, a, count);
54909	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
54910	assert_eq_m128i(r, e);
54911	}
54912
54913	#[simd_test(enable = "avx512f,avx512vl")]
54914	fn test_mm_maskz_sra_epi32() {
54915	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
54916	let count = _mm_set_epi32(`0`, `0`, `0`, `1`);
54917	let r = _mm_maskz_sra_epi32(`0`, a, count);
54918	assert_eq_m128i(r, _mm_setzero_si128());
54919	let r = _mm_maskz_sra_epi32(`0b00001111`, a, count);
54920	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
54921	assert_eq_m128i(r, e);
54922	}
54923
54924	#[simd_test(enable = "avx512f")]
54925	const fn test_mm512_srav_epi32() {
54926	let a = _mm512_set_epi32(`8`, `-8`, `16`, `-15`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`);
54927	let count = _mm512_set_epi32(`2`, `2`, `2`, `2`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54928	let r = _mm512_srav_epi32(a, count);
54929	let e = _mm512_set_epi32(`2`, `-2`, `4`, `-4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`);
54930	assert_eq_m512i(r, e);
54931	}
54932
54933	#[simd_test(enable = "avx512f")]
54934	const fn test_mm512_mask_srav_epi32() {
54935	let a = _mm512_set_epi32(`8`, `-8`, `16`, `-15`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `16`);
54936	let count = _mm512_set_epi32(`2`, `2`, `2`, `2`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`);
54937	let r = _mm512_mask_srav_epi32(a, `0`, a, count);
54938	assert_eq_m512i(r, a);
54939	let r = _mm512_mask_srav_epi32(a, `0b11111111_11111111`, a, count);
54940	let e = _mm512_set_epi32(`2`, `-2`, `4`, `-4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `8`);
54941	assert_eq_m512i(r, e);
54942	}
54943
54944	#[simd_test(enable = "avx512f")]
54945	const fn test_mm512_maskz_srav_epi32() {
54946	let a = _mm512_set_epi32(`8`, `-8`, `16`, `-15`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `-15`, `-14`);
54947	let count = _mm512_set_epi32(`2`, `2`, `2`, `2`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `2`, `2`);
54948	let r = _mm512_maskz_srav_epi32(`0`, a, count);
54949	assert_eq_m512i(r, _mm512_setzero_si512());
54950	let r = _mm512_maskz_srav_epi32(`0b00000000_11111111`, a, count);
54951	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `-4`, `-4`);
54952	assert_eq_m512i(r, e);
54953	}
54954
54955	#[simd_test(enable = "avx512f,avx512vl")]
54956	const fn test_mm256_mask_srav_epi32() {
54957	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54958	let count = _mm256_set1_epi32(`1`);
54959	let r = _mm256_mask_srav_epi32(a, `0`, a, count);
54960	assert_eq_m256i(r, a);
54961	let r = _mm256_mask_srav_epi32(a, `0b11111111`, a, count);
54962	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54963	assert_eq_m256i(r, e);
54964	}
54965
54966	#[simd_test(enable = "avx512f,avx512vl")]
54967	const fn test_mm256_maskz_srav_epi32() {
54968	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54969	let count = _mm256_set1_epi32(`1`);
54970	let r = _mm256_maskz_srav_epi32(`0`, a, count);
54971	assert_eq_m256i(r, _mm256_setzero_si256());
54972	let r = _mm256_maskz_srav_epi32(`0b11111111`, a, count);
54973	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
54974	assert_eq_m256i(r, e);
54975	}
54976
54977	#[simd_test(enable = "avx512f,avx512vl")]
54978	const fn test_mm_mask_srav_epi32() {
54979	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
54980	let count = _mm_set1_epi32(`1`);
54981	let r = _mm_mask_srav_epi32(a, `0`, a, count);
54982	assert_eq_m128i(r, a);
54983	let r = _mm_mask_srav_epi32(a, `0b00001111`, a, count);
54984	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
54985	assert_eq_m128i(r, e);
54986	}
54987
54988	#[simd_test(enable = "avx512f,avx512vl")]
54989	const fn test_mm_maskz_srav_epi32() {
54990	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
54991	let count = _mm_set1_epi32(`1`);
54992	let r = _mm_maskz_srav_epi32(`0`, a, count);
54993	assert_eq_m128i(r, _mm_setzero_si128());
54994	let r = _mm_maskz_srav_epi32(`0b00001111`, a, count);
54995	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
54996	assert_eq_m128i(r, e);
54997	}
54998
54999	#[simd_test(enable = "avx512f")]
55000	const fn test_mm512_srai_epi32() {
55001	let a = _mm512_set_epi32(`8`, `-8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `16`, `-15`);
55002	let r = _mm512_srai_epi32::<`2`>(a);
55003	let e = _mm512_set_epi32(`2`, `-2`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `4`, `-4`);
55004	assert_eq_m512i(r, e);
55005	}
55006
55007	#[simd_test(enable = "avx512f")]
55008	const fn test_mm512_mask_srai_epi32() {
55009	let a = _mm512_set_epi32(`8`, `-8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `15`, `-15`);
55010	let r = _mm512_mask_srai_epi32::<`2`>(a, `0`, a);
55011	assert_eq_m512i(r, a);
55012	let r = _mm512_mask_srai_epi32::<`2`>(a, `0b11111111_11111111`, a);
55013	let e = _mm512_set_epi32(`2`, `-2`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `3`, `-4`);
55014	assert_eq_m512i(r, e);
55015	}
55016
55017	#[simd_test(enable = "avx512f")]
55018	const fn test_mm512_maskz_srai_epi32() {
55019	let a = _mm512_set_epi32(`8`, `-8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `15`, `-15`);
55020	let r = _mm512_maskz_srai_epi32::<`2`>(`0`, a);
55021	assert_eq_m512i(r, _mm512_setzero_si512());
55022	let r = _mm512_maskz_srai_epi32::<`2`>(`0b00000000_11111111`, a);
55023	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `3`, `-4`);
55024	assert_eq_m512i(r, e);
55025	}
55026
55027	#[simd_test(enable = "avx512f,avx512vl")]
55028	const fn test_mm256_mask_srai_epi32() {
55029	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
55030	let r = _mm256_mask_srai_epi32::<`1`>(a, `0`, a);
55031	assert_eq_m256i(r, a);
55032	let r = _mm256_mask_srai_epi32::<`1`>(a, `0b11111111`, a);
55033	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
55034	assert_eq_m256i(r, e);
55035	}
55036
55037	#[simd_test(enable = "avx512f,avx512vl")]
55038	const fn test_mm256_maskz_srai_epi32() {
55039	let a = _mm256_set_epi32(`1` << `5`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
55040	let r = _mm256_maskz_srai_epi32::<`1`>(`0`, a);
55041	assert_eq_m256i(r, _mm256_setzero_si256());
55042	let r = _mm256_maskz_srai_epi32::<`1`>(`0b11111111`, a);
55043	let e = _mm256_set_epi32(`1` << `4`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
55044	assert_eq_m256i(r, e);
55045	}
55046
55047	#[simd_test(enable = "avx512f,avx512vl")]
55048	const fn test_mm_mask_srai_epi32() {
55049	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
55050	let r = _mm_mask_srai_epi32::<`1`>(a, `0`, a);
55051	assert_eq_m128i(r, a);
55052	let r = _mm_mask_srai_epi32::<`1`>(a, `0b00001111`, a);
55053	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
55054	assert_eq_m128i(r, e);
55055	}
55056
55057	#[simd_test(enable = "avx512f,avx512vl")]
55058	const fn test_mm_maskz_srai_epi32() {
55059	let a = _mm_set_epi32(`1` << `5`, `0`, `0`, `0`);
55060	let r = _mm_maskz_srai_epi32::<`1`>(`0`, a);
55061	assert_eq_m128i(r, _mm_setzero_si128());
55062	let r = _mm_maskz_srai_epi32::<`1`>(`0b00001111`, a);
55063	let e = _mm_set_epi32(`1` << `4`, `0`, `0`, `0`);
55064	assert_eq_m128i(r, e);
55065	}
55066
55067	#[simd_test(enable = "avx512f")]
55068	const fn test_mm512_permute_ps() {
55069	let a = _mm512_setr_ps(
55070	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
55071	);
55072	let r = _mm512_permute_ps::<`0b11_11_11_11`>(a);
55073	let e = _mm512_setr_ps(
55074	`3.`, `3.`, `3.`, `3.`, `7.`, `7.`, `7.`, `7.`, `11.`, `11.`, `11.`, `11.`, `15.`, `15.`, `15.`, `15.`,
55075	);
55076	assert_eq_m512(r, e);
55077	}
55078
55079	#[simd_test(enable = "avx512f")]
55080	const fn test_mm512_mask_permute_ps() {
55081	let a = _mm512_setr_ps(
55082	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
55083	);
55084	let r = _mm512_mask_permute_ps::<`0b11_11_11_11`>(a, `0`, a);
55085	assert_eq_m512(r, a);
55086	let r = _mm512_mask_permute_ps::<`0b11_11_11_11`>(a, `0b11111111_11111111`, a);
55087	let e = _mm512_setr_ps(
55088	`3.`, `3.`, `3.`, `3.`, `7.`, `7.`, `7.`, `7.`, `11.`, `11.`, `11.`, `11.`, `15.`, `15.`, `15.`, `15.`,
55089	);
55090	assert_eq_m512(r, e);
55091	}
55092
55093	#[simd_test(enable = "avx512f")]
55094	const fn test_mm512_maskz_permute_ps() {
55095	let a = _mm512_setr_ps(
55096	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
55097	);
55098	let r = _mm512_maskz_permute_ps::<`0b11_11_11_11`>(`0`, a);
55099	assert_eq_m512(r, _mm512_setzero_ps());
55100	let r = _mm512_maskz_permute_ps::<`0b11_11_11_11`>(`0b11111111_11111111`, a);
55101	let e = _mm512_setr_ps(
55102	`3.`, `3.`, `3.`, `3.`, `7.`, `7.`, `7.`, `7.`, `11.`, `11.`, `11.`, `11.`, `15.`, `15.`, `15.`, `15.`,
55103	);
55104	assert_eq_m512(r, e);
55105	}
55106
55107	#[simd_test(enable = "avx512f,avx512vl")]
55108	const fn test_mm256_mask_permute_ps() {
55109	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
55110	let r = _mm256_mask_permute_ps::<`0b11_11_11_11`>(a, `0`, a);
55111	assert_eq_m256(r, a);
55112	let r = _mm256_mask_permute_ps::<`0b11_11_11_11`>(a, `0b11111111`, a);
55113	let e = _mm256_set_ps(`0.`, `0.`, `0.`, `0.`, `4.`, `4.`, `4.`, `4.`);
55114	assert_eq_m256(r, e);
55115	}
55116
55117	#[simd_test(enable = "avx512f,avx512vl")]
55118	const fn test_mm256_maskz_permute_ps() {
55119	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
55120	let r = _mm256_maskz_permute_ps::<`0b11_11_11_11`>(`0`, a);
55121	assert_eq_m256(r, _mm256_setzero_ps());
55122	let r = _mm256_maskz_permute_ps::<`0b11_11_11_11`>(`0b11111111`, a);
55123	let e = _mm256_set_ps(`0.`, `0.`, `0.`, `0.`, `4.`, `4.`, `4.`, `4.`);
55124	assert_eq_m256(r, e);
55125	}
55126
55127	#[simd_test(enable = "avx512f,avx512vl")]
55128	const fn test_mm_mask_permute_ps() {
55129	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
55130	let r = _mm_mask_permute_ps::<`0b11_11_11_11`>(a, `0`, a);
55131	assert_eq_m128(r, a);
55132	let r = _mm_mask_permute_ps::<`0b11_11_11_11`>(a, `0b00001111`, a);
55133	let e = _mm_set_ps(`0.`, `0.`, `0.`, `0.`);
55134	assert_eq_m128(r, e);
55135	}
55136
55137	#[simd_test(enable = "avx512f,avx512vl")]
55138	const fn test_mm_maskz_permute_ps() {
55139	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
55140	let r = _mm_maskz_permute_ps::<`0b11_11_11_11`>(`0`, a);
55141	assert_eq_m128(r, _mm_setzero_ps());
55142	let r = _mm_maskz_permute_ps::<`0b11_11_11_11`>(`0b00001111`, a);
55143	let e = _mm_set_ps(`0.`, `0.`, `0.`, `0.`);
55144	assert_eq_m128(r, e);
55145	}
55146
55147	#[simd_test(enable = "avx512f")]
55148	fn test_mm512_permutevar_epi32() {
55149	let idx = _mm512_set1_epi32(`1`);
55150	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
55151	let r = _mm512_permutevar_epi32(idx, a);
55152	let e = _mm512_set1_epi32(`14`);
55153	assert_eq_m512i(r, e);
55154	}
55155
55156	#[simd_test(enable = "avx512f")]
55157	fn test_mm512_mask_permutevar_epi32() {
55158	let idx = _mm512_set1_epi32(`1`);
55159	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
55160	let r = _mm512_mask_permutevar_epi32(a, `0`, idx, a);
55161	assert_eq_m512i(r, a);
55162	let r = _mm512_mask_permutevar_epi32(a, `0b11111111_11111111`, idx, a);
55163	let e = _mm512_set1_epi32(`14`);
55164	assert_eq_m512i(r, e);
55165	}
55166
55167	#[simd_test(enable = "avx512f")]
55168	fn test_mm512_permutevar_ps() {
55169	let a = _mm512_set_ps(
55170	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
55171	);
55172	let b = _mm512_set1_epi32(`0b01`);
55173	let r = _mm512_permutevar_ps(a, b);
55174	let e = _mm512_set_ps(
55175	`2.`, `2.`, `2.`, `2.`, `6.`, `6.`, `6.`, `6.`, `10.`, `10.`, `10.`, `10.`, `14.`, `14.`, `14.`, `14.`,
55176	);
55177	assert_eq_m512(r, e);
55178	}
55179
55180	#[simd_test(enable = "avx512f")]
55181	fn test_mm512_mask_permutevar_ps() {
55182	let a = _mm512_set_ps(
55183	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
55184	);
55185	let b = _mm512_set1_epi32(`0b01`);
55186	let r = _mm512_mask_permutevar_ps(a, `0`, a, b);
55187	assert_eq_m512(r, a);
55188	let r = _mm512_mask_permutevar_ps(a, `0b11111111_11111111`, a, b);
55189	let e = _mm512_set_ps(
55190	`2.`, `2.`, `2.`, `2.`, `6.`, `6.`, `6.`, `6.`, `10.`, `10.`, `10.`, `10.`, `14.`, `14.`, `14.`, `14.`,
55191	);
55192	assert_eq_m512(r, e);
55193	}
55194
55195	#[simd_test(enable = "avx512f")]
55196	fn test_mm512_maskz_permutevar_ps() {
55197	let a = _mm512_set_ps(
55198	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
55199	);
55200	let b = _mm512_set1_epi32(`0b01`);
55201	let r = _mm512_maskz_permutevar_ps(`0`, a, b);
55202	assert_eq_m512(r, _mm512_setzero_ps());
55203	let r = _mm512_maskz_permutevar_ps(`0b00000000_11111111`, a, b);
55204	let e = _mm512_set_ps(
55205	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `10.`, `10.`, `10.`, `10.`, `14.`, `14.`, `14.`, `14.`,
55206	);
55207	assert_eq_m512(r, e);
55208	}
55209
55210	#[simd_test(enable = "avx512f,avx512vl")]
55211	fn test_mm256_mask_permutevar_ps() {
55212	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
55213	let b = _mm256_set1_epi32(`0b01`);
55214	let r = _mm256_mask_permutevar_ps(a, `0`, a, b);
55215	assert_eq_m256(r, a);
55216	let r = _mm256_mask_permutevar_ps(a, `0b11111111`, a, b);
55217	let e = _mm256_set_ps(`2.`, `2.`, `2.`, `2.`, `6.`, `6.`, `6.`, `6.`);
55218	assert_eq_m256(r, e);
55219	}
55220
55221	#[simd_test(enable = "avx512f,avx512vl")]
55222	fn test_mm256_maskz_permutevar_ps() {
55223	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
55224	let b = _mm256_set1_epi32(`0b01`);
55225	let r = _mm256_maskz_permutevar_ps(`0`, a, b);
55226	assert_eq_m256(r, _mm256_setzero_ps());
55227	let r = _mm256_maskz_permutevar_ps(`0b11111111`, a, b);
55228	let e = _mm256_set_ps(`2.`, `2.`, `2.`, `2.`, `6.`, `6.`, `6.`, `6.`);
55229	assert_eq_m256(r, e);
55230	}
55231
55232	#[simd_test(enable = "avx512f,avx512vl")]
55233	fn test_mm_mask_permutevar_ps() {
55234	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
55235	let b = _mm_set1_epi32(`0b01`);
55236	let r = _mm_mask_permutevar_ps(a, `0`, a, b);
55237	assert_eq_m128(r, a);
55238	let r = _mm_mask_permutevar_ps(a, `0b00001111`, a, b);
55239	let e = _mm_set_ps(`2.`, `2.`, `2.`, `2.`);
55240	assert_eq_m128(r, e);
55241	}
55242
55243	#[simd_test(enable = "avx512f,avx512vl")]
55244	fn test_mm_maskz_permutevar_ps() {
55245	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
55246	let b = _mm_set1_epi32(`0b01`);
55247	let r = _mm_maskz_permutevar_ps(`0`, a, b);
55248	assert_eq_m128(r, _mm_setzero_ps());
55249	let r = _mm_maskz_permutevar_ps(`0b00001111`, a, b);
55250	let e = _mm_set_ps(`2.`, `2.`, `2.`, `2.`);
55251	assert_eq_m128(r, e);
55252	}
55253
55254	#[simd_test(enable = "avx512f")]
55255	fn test_mm512_permutexvar_epi32() {
55256	let idx = _mm512_set1_epi32(`1`);
55257	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
55258	let r = _mm512_permutexvar_epi32(idx, a);
55259	let e = _mm512_set1_epi32(`14`);
55260	assert_eq_m512i(r, e);
55261	}
55262
55263	#[simd_test(enable = "avx512f")]
55264	fn test_mm512_mask_permutexvar_epi32() {
55265	let idx = _mm512_set1_epi32(`1`);
55266	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
55267	let r = _mm512_mask_permutexvar_epi32(a, `0`, idx, a);
55268	assert_eq_m512i(r, a);
55269	let r = _mm512_mask_permutexvar_epi32(a, `0b11111111_11111111`, idx, a);
55270	let e = _mm512_set1_epi32(`14`);
55271	assert_eq_m512i(r, e);
55272	}
55273
55274	#[simd_test(enable = "avx512f")]
55275	fn test_mm512_maskz_permutexvar_epi32() {
55276	let idx = _mm512_set1_epi32(`1`);
55277	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
55278	let r = _mm512_maskz_permutexvar_epi32(`0`, idx, a);
55279	assert_eq_m512i(r, _mm512_setzero_si512());
55280	let r = _mm512_maskz_permutexvar_epi32(`0b00000000_11111111`, idx, a);
55281	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `14`, `14`, `14`, `14`, `14`, `14`, `14`, `14`);
55282	assert_eq_m512i(r, e);
55283	}
55284
55285	#[simd_test(enable = "avx512f,avx512vl")]
55286	fn test_mm256_permutexvar_epi32() {
55287	let idx = _mm256_set1_epi32(`1`);
55288	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
55289	let r = _mm256_permutexvar_epi32(idx, a);
55290	let e = _mm256_set1_epi32(`6`);
55291	assert_eq_m256i(r, e);
55292	}
55293
55294	#[simd_test(enable = "avx512f,avx512vl")]
55295	fn test_mm256_mask_permutexvar_epi32() {
55296	let idx = _mm256_set1_epi32(`1`);
55297	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
55298	let r = _mm256_mask_permutexvar_epi32(a, `0`, idx, a);
55299	assert_eq_m256i(r, a);
55300	let r = _mm256_mask_permutexvar_epi32(a, `0b11111111`, idx, a);
55301	let e = _mm256_set1_epi32(`6`);
55302	assert_eq_m256i(r, e);
55303	}
55304
55305	#[simd_test(enable = "avx512f,avx512vl")]
55306	fn test_mm256_maskz_permutexvar_epi32() {
55307	let idx = _mm256_set1_epi32(`1`);
55308	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
55309	let r = _mm256_maskz_permutexvar_epi32(`0`, idx, a);
55310	assert_eq_m256i(r, _mm256_setzero_si256());
55311	let r = _mm256_maskz_permutexvar_epi32(`0b11111111`, idx, a);
55312	let e = _mm256_set1_epi32(`6`);
55313	assert_eq_m256i(r, e);
55314	}
55315
55316	#[simd_test(enable = "avx512f")]
55317	fn test_mm512_permutexvar_ps() {
55318	let idx = _mm512_set1_epi32(`1`);
55319	let a = _mm512_set_ps(
55320	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
55321	);
55322	let r = _mm512_permutexvar_ps(idx, a);
55323	let e = _mm512_set1_ps(`14.`);
55324	assert_eq_m512(r, e);
55325	}
55326
55327	#[simd_test(enable = "avx512f")]
55328	fn test_mm512_mask_permutexvar_ps() {
55329	let idx = _mm512_set1_epi32(`1`);
55330	let a = _mm512_set_ps(
55331	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
55332	);
55333	let r = _mm512_mask_permutexvar_ps(a, `0`, idx, a);
55334	assert_eq_m512(r, a);
55335	let r = _mm512_mask_permutexvar_ps(a, `0b11111111_11111111`, idx, a);
55336	let e = _mm512_set1_ps(`14.`);
55337	assert_eq_m512(r, e);
55338	}
55339
55340	#[simd_test(enable = "avx512f")]
55341	fn test_mm512_maskz_permutexvar_ps() {
55342	let idx = _mm512_set1_epi32(`1`);
55343	let a = _mm512_set_ps(
55344	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
55345	);
55346	let r = _mm512_maskz_permutexvar_ps(`0`, idx, a);
55347	assert_eq_m512(r, _mm512_setzero_ps());
55348	let r = _mm512_maskz_permutexvar_ps(`0b00000000_11111111`, idx, a);
55349	let e = _mm512_set_ps(
55350	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `14.`, `14.`, `14.`, `14.`, `14.`, `14.`, `14.`, `14.`,
55351	);
55352	assert_eq_m512(r, e);
55353	}
55354
55355	#[simd_test(enable = "avx512f,avx512vl")]
55356	fn test_mm256_permutexvar_ps() {
55357	let idx = _mm256_set1_epi32(`1`);
55358	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
55359	let r = _mm256_permutexvar_ps(idx, a);
55360	let e = _mm256_set1_ps(`6.`);
55361	assert_eq_m256(r, e);
55362	}
55363
55364	#[simd_test(enable = "avx512f,avx512vl")]
55365	fn test_mm256_mask_permutexvar_ps() {
55366	let idx = _mm256_set1_epi32(`1`);
55367	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
55368	let r = _mm256_mask_permutexvar_ps(a, `0`, idx, a);
55369	assert_eq_m256(r, a);
55370	let r = _mm256_mask_permutexvar_ps(a, `0b11111111`, idx, a);
55371	let e = _mm256_set1_ps(`6.`);
55372	assert_eq_m256(r, e);
55373	}
55374
55375	#[simd_test(enable = "avx512f,avx512vl")]
55376	fn test_mm256_maskz_permutexvar_ps() {
55377	let idx = _mm256_set1_epi32(`1`);
55378	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
55379	let r = _mm256_maskz_permutexvar_ps(`0`, idx, a);
55380	assert_eq_m256(r, _mm256_setzero_ps());
55381	let r = _mm256_maskz_permutexvar_ps(`0b11111111`, idx, a);
55382	let e = _mm256_set1_ps(`6.`);
55383	assert_eq_m256(r, e);
55384	}
55385
55386	#[simd_test(enable = "avx512f")]
55387	fn test_mm512_permutex2var_epi32() {
55388	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
55389	#[rustfmt::skip]
55390	let idx = _mm512_set_epi32(
55391	`1`, `1` << `4`, `2`, `1` << `4`,
55392	`3`, `1` << `4`, `4`, `1` << `4`,
55393	`5`, `1` << `4`, `6`, `1` << `4`,
55394	`7`, `1` << `4`, `8`, `1` << `4`,
55395	);
55396	let b = _mm512_set1_epi32(`100`);
55397	let r = _mm512_permutex2var_epi32(a, idx, b);
55398	let e = _mm512_set_epi32(
55399	`14`, `100`, `13`, `100`, `12`, `100`, `11`, `100`, `10`, `100`, `9`, `100`, `8`, `100`, `7`, `100`,
55400	);
55401	assert_eq_m512i(r, e);
55402	}
55403
55404	#[simd_test(enable = "avx512f")]
55405	fn test_mm512_mask_permutex2var_epi32() {
55406	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
55407	#[rustfmt::skip]
55408	let idx = _mm512_set_epi32(
55409	`1`, `1` << `4`, `2`, `1` << `4`,
55410	`3`, `1` << `4`, `4`, `1` << `4`,
55411	`5`, `1` << `4`, `6`, `1` << `4`,
55412	`7`, `1` << `4`, `8`, `1` << `4`,
55413	);
55414	let b = _mm512_set1_epi32(`100`);
55415	let r = _mm512_mask_permutex2var_epi32(a, `0`, idx, b);
55416	assert_eq_m512i(r, a);
55417	let r = _mm512_mask_permutex2var_epi32(a, `0b11111111_11111111`, idx, b);
55418	let e = _mm512_set_epi32(
55419	`14`, `100`, `13`, `100`, `12`, `100`, `11`, `100`, `10`, `100`, `9`, `100`, `8`, `100`, `7`, `100`,
55420	);
55421	assert_eq_m512i(r, e);
55422	}
55423
55424	#[simd_test(enable = "avx512f")]
55425	fn test_mm512_maskz_permutex2var_epi32() {
55426	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
55427	#[rustfmt::skip]
55428	let idx = _mm512_set_epi32(
55429	`1`, `1` << `4`, `2`, `1` << `4`,
55430	`3`, `1` << `4`, `4`, `1` << `4`,
55431	`5`, `1` << `4`, `6`, `1` << `4`,
55432	`7`, `1` << `4`, `8`, `1` << `4`,
55433	);
55434	let b = _mm512_set1_epi32(`100`);
55435	let r = _mm512_maskz_permutex2var_epi32(`0`, a, idx, b);
55436	assert_eq_m512i(r, _mm512_setzero_si512());
55437	let r = _mm512_maskz_permutex2var_epi32(`0b00000000_11111111`, a, idx, b);
55438	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `10`, `100`, `9`, `100`, `8`, `100`, `7`, `100`);
55439	assert_eq_m512i(r, e);
55440	}
55441
55442	#[simd_test(enable = "avx512f")]
55443	fn test_mm512_mask2_permutex2var_epi32() {
55444	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
55445	#[rustfmt::skip]
55446	let idx = _mm512_set_epi32(
55447	`1000`, `1` << `4`, `2000`, `1` << `4`,
55448	`3000`, `1` << `4`, `4000`, `1` << `4`,
55449	`5`, `1` << `4`, `6`, `1` << `4`,
55450	`7`, `1` << `4`, `8`, `1` << `4`,
55451	);
55452	let b = _mm512_set1_epi32(`100`);
55453	let r = _mm512_mask2_permutex2var_epi32(a, idx, `0`, b);
55454	assert_eq_m512i(r, idx);
55455	let r = _mm512_mask2_permutex2var_epi32(a, idx, `0b00000000_11111111`, b);
55456	#[rustfmt::skip]
55457	let e = _mm512_set_epi32(
55458	`1000`, `1` << `4`, `2000`, `1` << `4`,
55459	`3000`, `1` << `4`, `4000`, `1` << `4`,
55460	`10`, `100`, `9`, `100`,
55461	`8`, `100`, `7`, `100`,
55462	);
55463	assert_eq_m512i(r, e);
55464	}
55465
55466	#[simd_test(enable = "avx512f,avx512vl")]
55467	fn test_mm256_permutex2var_epi32() {
55468	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
55469	let idx = _mm256_set_epi32(`1`, `1` << `3`, `2`, `1` << `3`, `3`, `1` << `3`, `4`, `1` << `3`);
55470	let b = _mm256_set1_epi32(`100`);
55471	let r = _mm256_permutex2var_epi32(a, idx, b);
55472	let e = _mm256_set_epi32(`6`, `100`, `5`, `100`, `4`, `100`, `3`, `100`);
55473	assert_eq_m256i(r, e);
55474	}
55475
55476	#[simd_test(enable = "avx512f,avx512vl")]
55477	fn test_mm256_mask_permutex2var_epi32() {
55478	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
55479	let idx = _mm256_set_epi32(`1`, `1` << `3`, `2`, `1` << `3`, `3`, `1` << `3`, `4`, `1` << `3`);
55480	let b = _mm256_set1_epi32(`100`);
55481	let r = _mm256_mask_permutex2var_epi32(a, `0`, idx, b);
55482	assert_eq_m256i(r, a);
55483	let r = _mm256_mask_permutex2var_epi32(a, `0b11111111`, idx, b);
55484	let e = _mm256_set_epi32(`6`, `100`, `5`, `100`, `4`, `100`, `3`, `100`);
55485	assert_eq_m256i(r, e);
55486	}
55487
55488	#[simd_test(enable = "avx512f,avx512vl")]
55489	fn test_mm256_maskz_permutex2var_epi32() {
55490	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
55491	let idx = _mm256_set_epi32(`1`, `1` << `3`, `2`, `1` << `3`, `3`, `1` << `3`, `4`, `1` << `3`);
55492	let b = _mm256_set1_epi32(`100`);
55493	let r = _mm256_maskz_permutex2var_epi32(`0`, a, idx, b);
55494	assert_eq_m256i(r, _mm256_setzero_si256());
55495	let r = _mm256_maskz_permutex2var_epi32(`0b11111111`, a, idx, b);
55496	let e = _mm256_set_epi32(`6`, `100`, `5`, `100`, `4`, `100`, `3`, `100`);
55497	assert_eq_m256i(r, e);
55498	}
55499
55500	#[simd_test(enable = "avx512f,avx512vl")]
55501	fn test_mm256_mask2_permutex2var_epi32() {
55502	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
55503	let idx = _mm256_set_epi32(`1`, `1` << `3`, `2`, `1` << `3`, `3`, `1` << `3`, `4`, `1` << `3`);
55504	let b = _mm256_set1_epi32(`100`);
55505	let r = _mm256_mask2_permutex2var_epi32(a, idx, `0`, b);
55506	assert_eq_m256i(r, idx);
55507	let r = _mm256_mask2_permutex2var_epi32(a, idx, `0b11111111`, b);
55508	let e = _mm256_set_epi32(`6`, `100`, `5`, `100`, `4`, `100`, `3`, `100`);
55509	assert_eq_m256i(r, e);
55510	}
55511
55512	#[simd_test(enable = "avx512f,avx512vl")]
55513	fn test_mm_permutex2var_epi32() {
55514	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
55515	let idx = _mm_set_epi32(`1`, `1` << `2`, `2`, `1` << `2`);
55516	let b = _mm_set1_epi32(`100`);
55517	let r = _mm_permutex2var_epi32(a, idx, b);
55518	let e = _mm_set_epi32(`2`, `100`, `1`, `100`);
55519	assert_eq_m128i(r, e);
55520	}
55521
55522	#[simd_test(enable = "avx512f,avx512vl")]
55523	fn test_mm_mask_permutex2var_epi32() {
55524	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
55525	let idx = _mm_set_epi32(`1`, `1` << `2`, `2`, `1` << `2`);
55526	let b = _mm_set1_epi32(`100`);
55527	let r = _mm_mask_permutex2var_epi32(a, `0`, idx, b);
55528	assert_eq_m128i(r, a);
55529	let r = _mm_mask_permutex2var_epi32(a, `0b00001111`, idx, b);
55530	let e = _mm_set_epi32(`2`, `100`, `1`, `100`);
55531	assert_eq_m128i(r, e);
55532	}
55533
55534	#[simd_test(enable = "avx512f,avx512vl")]
55535	fn test_mm_maskz_permutex2var_epi32() {
55536	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
55537	let idx = _mm_set_epi32(`1`, `1` << `2`, `2`, `1` << `2`);
55538	let b = _mm_set1_epi32(`100`);
55539	let r = _mm_maskz_permutex2var_epi32(`0`, a, idx, b);
55540	assert_eq_m128i(r, _mm_setzero_si128());
55541	let r = _mm_maskz_permutex2var_epi32(`0b00001111`, a, idx, b);
55542	let e = _mm_set_epi32(`2`, `100`, `1`, `100`);
55543	assert_eq_m128i(r, e);
55544	}
55545
55546	#[simd_test(enable = "avx512f,avx512vl")]
55547	fn test_mm_mask2_permutex2var_epi32() {
55548	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
55549	let idx = _mm_set_epi32(`1`, `1` << `2`, `2`, `1` << `2`);
55550	let b = _mm_set1_epi32(`100`);
55551	let r = _mm_mask2_permutex2var_epi32(a, idx, `0`, b);
55552	assert_eq_m128i(r, idx);
55553	let r = _mm_mask2_permutex2var_epi32(a, idx, `0b00001111`, b);
55554	let e = _mm_set_epi32(`2`, `100`, `1`, `100`);
55555	assert_eq_m128i(r, e);
55556	}
55557
55558	#[simd_test(enable = "avx512f")]
55559	fn test_mm512_permutex2var_ps() {
55560	let a = _mm512_set_ps(
55561	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
55562	);
55563	#[rustfmt::skip]
55564	let idx = _mm512_set_epi32(
55565	`1`, `1` << `4`, `2`, `1` << `4`,
55566	`3`, `1` << `4`, `4`, `1` << `4`,
55567	`5`, `1` << `4`, `6`, `1` << `4`,
55568	`7`, `1` << `4`, `8`, `1` << `4`,
55569	);
55570	let b = _mm512_set1_ps(`100.`);
55571	let r = _mm512_permutex2var_ps(a, idx, b);
55572	let e = _mm512_set_ps(
55573	`14.`, `100.`, `13.`, `100.`, `12.`, `100.`, `11.`, `100.`, `10.`, `100.`, `9.`, `100.`, `8.`, `100.`, `7.`, `100.`,
55574	);
55575	assert_eq_m512(r, e);
55576	}
55577
55578	#[simd_test(enable = "avx512f")]
55579	fn test_mm512_mask_permutex2var_ps() {
55580	let a = _mm512_set_ps(
55581	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
55582	);
55583	#[rustfmt::skip]
55584	let idx = _mm512_set_epi32(
55585	`1`, `1` << `4`, `2`, `1` << `4`,
55586	`3`, `1` << `4`, `4`, `1` << `4`,
55587	`5`, `1` << `4`, `6`, `1` << `4`,
55588	`7`, `1` << `4`, `8`, `1` << `4`,
55589	);
55590	let b = _mm512_set1_ps(`100.`);
55591	let r = _mm512_mask_permutex2var_ps(a, `0`, idx, b);
55592	assert_eq_m512(r, a);
55593	let r = _mm512_mask_permutex2var_ps(a, `0b11111111_11111111`, idx, b);
55594	let e = _mm512_set_ps(
55595	`14.`, `100.`, `13.`, `100.`, `12.`, `100.`, `11.`, `100.`, `10.`, `100.`, `9.`, `100.`, `8.`, `100.`, `7.`, `100.`,
55596	);
55597	assert_eq_m512(r, e);
55598	}
55599
55600	#[simd_test(enable = "avx512f")]
55601	fn test_mm512_maskz_permutex2var_ps() {
55602	let a = _mm512_set_ps(
55603	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
55604	);
55605	#[rustfmt::skip]
55606	let idx = _mm512_set_epi32(
55607	`1`, `1` << `4`, `2`, `1` << `4`,
55608	`3`, `1` << `4`, `4`, `1` << `4`,
55609	`5`, `1` << `4`, `6`, `1` << `4`,
55610	`7`, `1` << `4`, `8`, `1` << `4`,
55611	);
55612	let b = _mm512_set1_ps(`100.`);
55613	let r = _mm512_maskz_permutex2var_ps(`0`, a, idx, b);
55614	assert_eq_m512(r, _mm512_setzero_ps());
55615	let r = _mm512_maskz_permutex2var_ps(`0b00000000_11111111`, a, idx, b);
55616	let e = _mm512_set_ps(
55617	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `10.`, `100.`, `9.`, `100.`, `8.`, `100.`, `7.`, `100.`,
55618	);
55619	assert_eq_m512(r, e);
55620	}
55621
55622	#[simd_test(enable = "avx512f")]
55623	fn test_mm512_mask2_permutex2var_ps() {
55624	let a = _mm512_set_ps(
55625	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
55626	);
55627	#[rustfmt::skip]
55628	let idx = _mm512_set_epi32(
55629	`1`, `1` << `4`, `2`, `1` << `4`,
55630	`3`, `1` << `4`, `4`, `1` << `4`,
55631	`5`, `1` << `4`, `6`, `1` << `4`,
55632	`7`, `1` << `4`, `8`, `1` << `4`,
55633	);
55634	let b = _mm512_set1_ps(`100.`);
55635	let r = _mm512_mask2_permutex2var_ps(a, idx, `0`, b);
55636	assert_eq_m512(r, _mm512_castsi512_ps(idx));
55637	let r = _mm512_mask2_permutex2var_ps(a, idx, `0b11111111_11111111`, b);
55638	let e = _mm512_set_ps(
55639	`14.`, `100.`, `13.`, `100.`, `12.`, `100.`, `11.`, `100.`, `10.`, `100.`, `9.`, `100.`, `8.`, `100.`, `7.`, `100.`,
55640	);
55641	assert_eq_m512(r, e);
55642	}
55643
55644	#[simd_test(enable = "avx512f,avx512vl")]
55645	fn test_mm256_permutex2var_ps() {
55646	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
55647	let idx = _mm256_set_epi32(`1`, `1` << `3`, `2`, `1` << `3`, `3`, `1` << `3`, `4`, `1` << `3`);
55648	let b = _mm256_set1_ps(`100.`);
55649	let r = _mm256_permutex2var_ps(a, idx, b);
55650	let e = _mm256_set_ps(`6.`, `100.`, `5.`, `100.`, `4.`, `100.`, `3.`, `100.`);
55651	assert_eq_m256(r, e);
55652	}
55653
55654	#[simd_test(enable = "avx512f,avx512vl")]
55655	fn test_mm256_mask_permutex2var_ps() {
55656	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
55657	let idx = _mm256_set_epi32(`1`, `1` << `3`, `2`, `1` << `3`, `3`, `1` << `3`, `4`, `1` << `3`);
55658	let b = _mm256_set1_ps(`100.`);
55659	let r = _mm256_mask_permutex2var_ps(a, `0`, idx, b);
55660	assert_eq_m256(r, a);
55661	let r = _mm256_mask_permutex2var_ps(a, `0b11111111`, idx, b);
55662	let e = _mm256_set_ps(`6.`, `100.`, `5.`, `100.`, `4.`, `100.`, `3.`, `100.`);
55663	assert_eq_m256(r, e);
55664	}
55665
55666	#[simd_test(enable = "avx512f,avx512vl")]
55667	fn test_mm256_maskz_permutex2var_ps() {
55668	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
55669	let idx = _mm256_set_epi32(`1`, `1` << `3`, `2`, `1` << `3`, `3`, `1` << `3`, `4`, `1` << `3`);
55670	let b = _mm256_set1_ps(`100.`);
55671	let r = _mm256_maskz_permutex2var_ps(`0`, a, idx, b);
55672	assert_eq_m256(r, _mm256_setzero_ps());
55673	let r = _mm256_maskz_permutex2var_ps(`0b11111111`, a, idx, b);
55674	let e = _mm256_set_ps(`6.`, `100.`, `5.`, `100.`, `4.`, `100.`, `3.`, `100.`);
55675	assert_eq_m256(r, e);
55676	}
55677
55678	#[simd_test(enable = "avx512f,avx512vl")]
55679	fn test_mm256_mask2_permutex2var_ps() {
55680	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
55681	let idx = _mm256_set_epi32(`1`, `1` << `3`, `2`, `1` << `3`, `3`, `1` << `3`, `4`, `1` << `3`);
55682	let b = _mm256_set1_ps(`100.`);
55683	let r = _mm256_mask2_permutex2var_ps(a, idx, `0`, b);
55684	assert_eq_m256(r, _mm256_castsi256_ps(idx));
55685	let r = _mm256_mask2_permutex2var_ps(a, idx, `0b11111111`, b);
55686	let e = _mm256_set_ps(`6.`, `100.`, `5.`, `100.`, `4.`, `100.`, `3.`, `100.`);
55687	assert_eq_m256(r, e);
55688	}
55689
55690	#[simd_test(enable = "avx512f,avx512vl")]
55691	fn test_mm_permutex2var_ps() {
55692	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
55693	let idx = _mm_set_epi32(`1`, `1` << `2`, `2`, `1` << `2`);
55694	let b = _mm_set1_ps(`100.`);
55695	let r = _mm_permutex2var_ps(a, idx, b);
55696	let e = _mm_set_ps(`2.`, `100.`, `1.`, `100.`);
55697	assert_eq_m128(r, e);
55698	}
55699
55700	#[simd_test(enable = "avx512f,avx512vl")]
55701	fn test_mm_mask_permutex2var_ps() {
55702	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
55703	let idx = _mm_set_epi32(`1`, `1` << `2`, `2`, `1` << `2`);
55704	let b = _mm_set1_ps(`100.`);
55705	let r = _mm_mask_permutex2var_ps(a, `0`, idx, b);
55706	assert_eq_m128(r, a);
55707	let r = _mm_mask_permutex2var_ps(a, `0b00001111`, idx, b);
55708	let e = _mm_set_ps(`2.`, `100.`, `1.`, `100.`);
55709	assert_eq_m128(r, e);
55710	}
55711
55712	#[simd_test(enable = "avx512f,avx512vl")]
55713	fn test_mm_maskz_permutex2var_ps() {
55714	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
55715	let idx = _mm_set_epi32(`1`, `1` << `2`, `2`, `1` << `2`);
55716	let b = _mm_set1_ps(`100.`);
55717	let r = _mm_maskz_permutex2var_ps(`0`, a, idx, b);
55718	assert_eq_m128(r, _mm_setzero_ps());
55719	let r = _mm_maskz_permutex2var_ps(`0b00001111`, a, idx, b);
55720	let e = _mm_set_ps(`2.`, `100.`, `1.`, `100.`);
55721	assert_eq_m128(r, e);
55722	}
55723
55724	#[simd_test(enable = "avx512f,avx512vl")]
55725	fn test_mm_mask2_permutex2var_ps() {
55726	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
55727	let idx = _mm_set_epi32(`1`, `1` << `2`, `2`, `1` << `2`);
55728	let b = _mm_set1_ps(`100.`);
55729	let r = _mm_mask2_permutex2var_ps(a, idx, `0`, b);
55730	assert_eq_m128(r, _mm_castsi128_ps(idx));
55731	let r = _mm_mask2_permutex2var_ps(a, idx, `0b00001111`, b);
55732	let e = _mm_set_ps(`2.`, `100.`, `1.`, `100.`);
55733	assert_eq_m128(r, e);
55734	}
55735
55736	#[simd_test(enable = "avx512f")]
55737	const fn test_mm512_shuffle_epi32() {
55738	let a = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`, `1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
55739	let r = _mm512_shuffle_epi32::<_MM_PERM_AADD>(a);
55740	let e = _mm512_setr_epi32(`8`, `8`, `1`, `1`, `16`, `16`, `9`, `9`, `8`, `8`, `1`, `1`, `16`, `16`, `9`, `9`);
55741	assert_eq_m512i(r, e);
55742	}
55743
55744	#[simd_test(enable = "avx512f")]
55745	const fn test_mm512_mask_shuffle_epi32() {
55746	let a = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`, `1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
55747	let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, `0`, a);
55748	assert_eq_m512i(r, a);
55749	let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, `0b11111111_11111111`, a);
55750	let e = _mm512_setr_epi32(`8`, `8`, `1`, `1`, `16`, `16`, `9`, `9`, `8`, `8`, `1`, `1`, `16`, `16`, `9`, `9`);
55751	assert_eq_m512i(r, e);
55752	}
55753
55754	#[simd_test(enable = "avx512f")]
55755	const fn test_mm512_maskz_shuffle_epi32() {
55756	let a = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`, `1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
55757	let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(`0`, a);
55758	assert_eq_m512i(r, _mm512_setzero_si512());
55759	let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(`0b00000000_11111111`, a);
55760	let e = _mm512_setr_epi32(`8`, `8`, `1`, `1`, `16`, `16`, `9`, `9`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
55761	assert_eq_m512i(r, e);
55762	}
55763
55764	#[simd_test(enable = "avx512f,avx512vl")]
55765	const fn test_mm256_mask_shuffle_epi32() {
55766	let a = _mm256_set_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
55767	let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, `0`, a);
55768	assert_eq_m256i(r, a);
55769	let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, `0b11111111`, a);
55770	let e = _mm256_set_epi32(`8`, `8`, `1`, `1`, `16`, `16`, `9`, `9`);
55771	assert_eq_m256i(r, e);
55772	}
55773
55774	#[simd_test(enable = "avx512f,avx512vl")]
55775	const fn test_mm256_maskz_shuffle_epi32() {
55776	let a = _mm256_set_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
55777	let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(`0`, a);
55778	assert_eq_m256i(r, _mm256_setzero_si256());
55779	let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(`0b11111111`, a);
55780	let e = _mm256_set_epi32(`8`, `8`, `1`, `1`, `16`, `16`, `9`, `9`);
55781	assert_eq_m256i(r, e);
55782	}
55783
55784	#[simd_test(enable = "avx512f,avx512vl")]
55785	const fn test_mm_mask_shuffle_epi32() {
55786	let a = _mm_set_epi32(`1`, `4`, `5`, `8`);
55787	let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, `0`, a);
55788	assert_eq_m128i(r, a);
55789	let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, `0b00001111`, a);
55790	let e = _mm_set_epi32(`8`, `8`, `1`, `1`);
55791	assert_eq_m128i(r, e);
55792	}
55793
55794	#[simd_test(enable = "avx512f,avx512vl")]
55795	const fn test_mm_maskz_shuffle_epi32() {
55796	let a = _mm_set_epi32(`1`, `4`, `5`, `8`);
55797	let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(`0`, a);
55798	assert_eq_m128i(r, _mm_setzero_si128());
55799	let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(`0b00001111`, a);
55800	let e = _mm_set_epi32(`8`, `8`, `1`, `1`);
55801	assert_eq_m128i(r, e);
55802	}
55803
55804	#[simd_test(enable = "avx512f")]
55805	const fn test_mm512_shuffle_ps() {
55806	let a = _mm512_setr_ps(
55807	`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`, `1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`,
55808	);
55809	let b = _mm512_setr_ps(
55810	`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`, `2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`,
55811	);
55812	let r = _mm512_shuffle_ps::<`0b00_00_11_11`>(a, b);
55813	let e = _mm512_setr_ps(
55814	`8.`, `8.`, `2.`, `2.`, `16.`, `16.`, `10.`, `10.`, `8.`, `8.`, `2.`, `2.`, `16.`, `16.`, `10.`, `10.`,
55815	);
55816	assert_eq_m512(r, e);
55817	}
55818
55819	#[simd_test(enable = "avx512f")]
55820	const fn test_mm512_mask_shuffle_ps() {
55821	let a = _mm512_setr_ps(
55822	`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`, `1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`,
55823	);
55824	let b = _mm512_setr_ps(
55825	`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`, `2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`,
55826	);
55827	let r = _mm512_mask_shuffle_ps::<`0b00_00_11_11`>(a, `0`, a, b);
55828	assert_eq_m512(r, a);
55829	let r = _mm512_mask_shuffle_ps::<`0b00_00_11_11`>(a, `0b11111111_11111111`, a, b);
55830	let e = _mm512_setr_ps(
55831	`8.`, `8.`, `2.`, `2.`, `16.`, `16.`, `10.`, `10.`, `8.`, `8.`, `2.`, `2.`, `16.`, `16.`, `10.`, `10.`,
55832	);
55833	assert_eq_m512(r, e);
55834	}
55835
55836	#[simd_test(enable = "avx512f")]
55837	const fn test_mm512_maskz_shuffle_ps() {
55838	let a = _mm512_setr_ps(
55839	`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`, `1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`,
55840	);
55841	let b = _mm512_setr_ps(
55842	`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`, `2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`,
55843	);
55844	let r = _mm512_maskz_shuffle_ps::<`0b00_00_11_11`>(`0`, a, b);
55845	assert_eq_m512(r, _mm512_setzero_ps());
55846	let r = _mm512_maskz_shuffle_ps::<`0b00_00_11_11`>(`0b00000000_11111111`, a, b);
55847	let e = _mm512_setr_ps(
55848	`8.`, `8.`, `2.`, `2.`, `16.`, `16.`, `10.`, `10.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
55849	);
55850	assert_eq_m512(r, e);
55851	}
55852
55853	#[simd_test(enable = "avx512f,avx512vl")]
55854	const fn test_mm256_mask_shuffle_ps() {
55855	let a = _mm256_set_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`);
55856	let b = _mm256_set_ps(`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`);
55857	let r = _mm256_mask_shuffle_ps::<`0b11_11_11_11`>(a, `0`, a, b);
55858	assert_eq_m256(r, a);
55859	let r = _mm256_mask_shuffle_ps::<`0b00_00_11_11`>(a, `0b11111111`, a, b);
55860	let e = _mm256_set_ps(`7.`, `7.`, `1.`, `1.`, `15.`, `15.`, `9.`, `9.`);
55861	assert_eq_m256(r, e);
55862	}
55863
55864	#[simd_test(enable = "avx512f,avx512vl")]
55865	const fn test_mm256_maskz_shuffle_ps() {
55866	let a = _mm256_set_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`);
55867	let b = _mm256_set_ps(`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`);
55868	let r = _mm256_maskz_shuffle_ps::<`0b11_11_11_11`>(`0`, a, b);
55869	assert_eq_m256(r, _mm256_setzero_ps());
55870	let r = _mm256_maskz_shuffle_ps::<`0b00_00_11_11`>(`0b11111111`, a, b);
55871	let e = _mm256_set_ps(`7.`, `7.`, `1.`, `1.`, `15.`, `15.`, `9.`, `9.`);
55872	assert_eq_m256(r, e);
55873	}
55874
55875	#[simd_test(enable = "avx512f,avx512vl")]
55876	const fn test_mm_mask_shuffle_ps() {
55877	let a = _mm_set_ps(`1.`, `4.`, `5.`, `8.`);
55878	let b = _mm_set_ps(`2.`, `3.`, `6.`, `7.`);
55879	let r = _mm_mask_shuffle_ps::<`0b11_11_11_11`>(a, `0`, a, b);
55880	assert_eq_m128(r, a);
55881	let r = _mm_mask_shuffle_ps::<`0b00_00_11_11`>(a, `0b00001111`, a, b);
55882	let e = _mm_set_ps(`7.`, `7.`, `1.`, `1.`);
55883	assert_eq_m128(r, e);
55884	}
55885
55886	#[simd_test(enable = "avx512f,avx512vl")]
55887	const fn test_mm_maskz_shuffle_ps() {
55888	let a = _mm_set_ps(`1.`, `4.`, `5.`, `8.`);
55889	let b = _mm_set_ps(`2.`, `3.`, `6.`, `7.`);
55890	let r = _mm_maskz_shuffle_ps::<`0b11_11_11_11`>(`0`, a, b);
55891	assert_eq_m128(r, _mm_setzero_ps());
55892	let r = _mm_maskz_shuffle_ps::<`0b00_00_11_11`>(`0b00001111`, a, b);
55893	let e = _mm_set_ps(`7.`, `7.`, `1.`, `1.`);
55894	assert_eq_m128(r, e);
55895	}
55896
55897	#[simd_test(enable = "avx512f")]
55898	const fn test_mm512_shuffle_i32x4() {
55899	let a = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`, `1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
55900	let b = _mm512_setr_epi32(`2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`, `2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`);
55901	let r = _mm512_shuffle_i32x4::<`0b00_00_00_00`>(a, b);
55902	let e = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `1`, `4`, `5`, `8`, `2`, `3`, `6`, `7`, `2`, `3`, `6`, `7`);
55903	assert_eq_m512i(r, e);
55904	}
55905
55906	#[simd_test(enable = "avx512f")]
55907	const fn test_mm512_mask_shuffle_i32x4() {
55908	let a = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`, `1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
55909	let b = _mm512_setr_epi32(`2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`, `2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`);
55910	let r = _mm512_mask_shuffle_i32x4::<`0b00_00_00_00`>(a, `0`, a, b);
55911	assert_eq_m512i(r, a);
55912	let r = _mm512_mask_shuffle_i32x4::<`0b00_00_00_00`>(a, `0b11111111_11111111`, a, b);
55913	let e = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `1`, `4`, `5`, `8`, `2`, `3`, `6`, `7`, `2`, `3`, `6`, `7`);
55914	assert_eq_m512i(r, e);
55915	}
55916
55917	#[simd_test(enable = "avx512f")]
55918	const fn test_mm512_maskz_shuffle_i32x4() {
55919	let a = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`, `1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
55920	let b = _mm512_setr_epi32(`2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`, `2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`);
55921	let r = _mm512_maskz_shuffle_i32x4::<`0b00_00_00_00`>(`0`, a, b);
55922	assert_eq_m512i(r, _mm512_setzero_si512());
55923	let r = _mm512_maskz_shuffle_i32x4::<`0b00_00_00_00`>(`0b00000000_11111111`, a, b);
55924	let e = _mm512_setr_epi32(`1`, `4`, `5`, `8`, `1`, `4`, `5`, `8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
55925	assert_eq_m512i(r, e);
55926	}
55927
55928	#[simd_test(enable = "avx512f,avx512vl")]
55929	const fn test_mm256_shuffle_i32x4() {
55930	let a = _mm256_set_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
55931	let b = _mm256_set_epi32(`2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`);
55932	let r = _mm256_shuffle_i32x4::<`0b00`>(a, b);
55933	let e = _mm256_set_epi32(`10`, `11`, `14`, `15`, `9`, `12`, `13`, `16`);
55934	assert_eq_m256i(r, e);
55935	}
55936
55937	#[simd_test(enable = "avx512f,avx512vl")]
55938	const fn test_mm256_mask_shuffle_i32x4() {
55939	let a = _mm256_set_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
55940	let b = _mm256_set_epi32(`2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`);
55941	let r = _mm256_mask_shuffle_i32x4::<`0b00`>(a, `0`, a, b);
55942	assert_eq_m256i(r, a);
55943	let r = _mm256_mask_shuffle_i32x4::<`0b00`>(a, `0b11111111`, a, b);
55944	let e = _mm256_set_epi32(`10`, `11`, `14`, `15`, `9`, `12`, `13`, `16`);
55945	assert_eq_m256i(r, e);
55946	}
55947
55948	#[simd_test(enable = "avx512f,avx512vl")]
55949	const fn test_mm256_maskz_shuffle_i32x4() {
55950	let a = _mm256_set_epi32(`1`, `4`, `5`, `8`, `9`, `12`, `13`, `16`);
55951	let b = _mm256_set_epi32(`2`, `3`, `6`, `7`, `10`, `11`, `14`, `15`);
55952	let r = _mm256_maskz_shuffle_i32x4::<`0b00`>(`0`, a, b);
55953	assert_eq_m256i(r, _mm256_setzero_si256());
55954	let r = _mm256_maskz_shuffle_i32x4::<`0b00`>(`0b11111111`, a, b);
55955	let e = _mm256_set_epi32(`10`, `11`, `14`, `15`, `9`, `12`, `13`, `16`);
55956	assert_eq_m256i(r, e);
55957	}
55958
55959	#[simd_test(enable = "avx512f")]
55960	const fn test_mm512_shuffle_f32x4() {
55961	let a = _mm512_setr_ps(
55962	`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`, `1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`,
55963	);
55964	let b = _mm512_setr_ps(
55965	`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`, `2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`,
55966	);
55967	let r = _mm512_shuffle_f32x4::<`0b00_00_00_00`>(a, b);
55968	let e = _mm512_setr_ps(
55969	`1.`, `4.`, `5.`, `8.`, `1.`, `4.`, `5.`, `8.`, `2.`, `3.`, `6.`, `7.`, `2.`, `3.`, `6.`, `7.`,
55970	);
55971	assert_eq_m512(r, e);
55972	}
55973
55974	#[simd_test(enable = "avx512f")]
55975	const fn test_mm512_mask_shuffle_f32x4() {
55976	let a = _mm512_setr_ps(
55977	`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`, `1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`,
55978	);
55979	let b = _mm512_setr_ps(
55980	`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`, `2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`,
55981	);
55982	let r = _mm512_mask_shuffle_f32x4::<`0b00_00_00_00`>(a, `0`, a, b);
55983	assert_eq_m512(r, a);
55984	let r = _mm512_mask_shuffle_f32x4::<`0b00_00_00_00`>(a, `0b11111111_11111111`, a, b);
55985	let e = _mm512_setr_ps(
55986	`1.`, `4.`, `5.`, `8.`, `1.`, `4.`, `5.`, `8.`, `2.`, `3.`, `6.`, `7.`, `2.`, `3.`, `6.`, `7.`,
55987	);
55988	assert_eq_m512(r, e);
55989	}
55990
55991	#[simd_test(enable = "avx512f")]
55992	const fn test_mm512_maskz_shuffle_f32x4() {
55993	let a = _mm512_setr_ps(
55994	`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`, `1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`,
55995	);
55996	let b = _mm512_setr_ps(
55997	`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`, `2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`,
55998	);
55999	let r = _mm512_maskz_shuffle_f32x4::<`0b00_00_00_00`>(`0`, a, b);
56000	assert_eq_m512(r, _mm512_setzero_ps());
56001	let r = _mm512_maskz_shuffle_f32x4::<`0b00_00_00_00`>(`0b00000000_11111111`, a, b);
56002	let e = _mm512_setr_ps(
56003	`1.`, `4.`, `5.`, `8.`, `1.`, `4.`, `5.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
56004	);
56005	assert_eq_m512(r, e);
56006	}
56007
56008	#[simd_test(enable = "avx512f,avx512vl")]
56009	const fn test_mm256_shuffle_f32x4() {
56010	let a = _mm256_set_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`);
56011	let b = _mm256_set_ps(`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`);
56012	let r = _mm256_shuffle_f32x4::<`0b00`>(a, b);
56013	let e = _mm256_set_ps(`10.`, `11.`, `14.`, `15.`, `9.`, `12.`, `13.`, `16.`);
56014	assert_eq_m256(r, e);
56015	}
56016
56017	#[simd_test(enable = "avx512f,avx512vl")]
56018	const fn test_mm256_mask_shuffle_f32x4() {
56019	let a = _mm256_set_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`);
56020	let b = _mm256_set_ps(`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`);
56021	let r = _mm256_mask_shuffle_f32x4::<`0b00`>(a, `0`, a, b);
56022	assert_eq_m256(r, a);
56023	let r = _mm256_mask_shuffle_f32x4::<`0b00`>(a, `0b11111111`, a, b);
56024	let e = _mm256_set_ps(`10.`, `11.`, `14.`, `15.`, `9.`, `12.`, `13.`, `16.`);
56025	assert_eq_m256(r, e);
56026	}
56027
56028	#[simd_test(enable = "avx512f,avx512vl")]
56029	const fn test_mm256_maskz_shuffle_f32x4() {
56030	let a = _mm256_set_ps(`1.`, `4.`, `5.`, `8.`, `9.`, `12.`, `13.`, `16.`);
56031	let b = _mm256_set_ps(`2.`, `3.`, `6.`, `7.`, `10.`, `11.`, `14.`, `15.`);
56032	let r = _mm256_maskz_shuffle_f32x4::<`0b00`>(`0`, a, b);
56033	assert_eq_m256(r, _mm256_setzero_ps());
56034	let r = _mm256_maskz_shuffle_f32x4::<`0b00`>(`0b11111111`, a, b);
56035	let e = _mm256_set_ps(`10.`, `11.`, `14.`, `15.`, `9.`, `12.`, `13.`, `16.`);
56036	assert_eq_m256(r, e);
56037	}
56038
56039	#[simd_test(enable = "avx512f")]
56040	const fn test_mm512_extractf32x4_ps() {
56041	let a = _mm512_setr_ps(
56042	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
56043	);
56044	let r = _mm512_extractf32x4_ps::<`1`>(a);
56045	let e = _mm_setr_ps(`5.`, `6.`, `7.`, `8.`);
56046	assert_eq_m128(r, e);
56047	}
56048
56049	#[simd_test(enable = "avx512f")]
56050	const fn test_mm512_mask_extractf32x4_ps() {
56051	let a = _mm512_setr_ps(
56052	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
56053	);
56054	let src = _mm_set1_ps(`100.`);
56055	let r = _mm512_mask_extractf32x4_ps::<`1`>(src, `0`, a);
56056	assert_eq_m128(r, src);
56057	let r = _mm512_mask_extractf32x4_ps::<`1`>(src, `0b11111111`, a);
56058	let e = _mm_setr_ps(`5.`, `6.`, `7.`, `8.`);
56059	assert_eq_m128(r, e);
56060	}
56061
56062	#[simd_test(enable = "avx512f")]
56063	const fn test_mm512_maskz_extractf32x4_ps() {
56064	let a = _mm512_setr_ps(
56065	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
56066	);
56067	let r = _mm512_maskz_extractf32x4_ps::<`1`>(`0`, a);
56068	assert_eq_m128(r, _mm_setzero_ps());
56069	let r = _mm512_maskz_extractf32x4_ps::<`1`>(`0b00000001`, a);
56070	let e = _mm_setr_ps(`5.`, `0.`, `0.`, `0.`);
56071	assert_eq_m128(r, e);
56072	}
56073
56074	#[simd_test(enable = "avx512f,avx512vl")]
56075	const fn test_mm256_extractf32x4_ps() {
56076	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
56077	let r = _mm256_extractf32x4_ps::<`1`>(a);
56078	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
56079	assert_eq_m128(r, e);
56080	}
56081
56082	#[simd_test(enable = "avx512f,avx512vl")]
56083	const fn test_mm256_mask_extractf32x4_ps() {
56084	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
56085	let src = _mm_set1_ps(`100.`);
56086	let r = _mm256_mask_extractf32x4_ps::<`1`>(src, `0`, a);
56087	assert_eq_m128(r, src);
56088	let r = _mm256_mask_extractf32x4_ps::<`1`>(src, `0b00001111`, a);
56089	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
56090	assert_eq_m128(r, e);
56091	}
56092
56093	#[simd_test(enable = "avx512f,avx512vl")]
56094	const fn test_mm256_maskz_extractf32x4_ps() {
56095	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
56096	let r = _mm256_maskz_extractf32x4_ps::<`1`>(`0`, a);
56097	assert_eq_m128(r, _mm_setzero_ps());
56098	let r = _mm256_maskz_extractf32x4_ps::<`1`>(`0b00001111`, a);
56099	let e = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
56100	assert_eq_m128(r, e);
56101	}
56102
56103	#[simd_test(enable = "avx512f")]
56104	const fn test_mm512_extracti32x4_epi32() {
56105	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
56106	let r = _mm512_extracti32x4_epi32::<`1`>(a);
56107	let e = _mm_setr_epi32(`5`, `6`, `7`, `8`);
56108	assert_eq_m128i(r, e);
56109	}
56110
56111	#[simd_test(enable = "avx512f")]
56112	const fn test_mm512_mask_extracti32x4_epi32() {
56113	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
56114	let src = _mm_set1_epi32(`100`);
56115	let r = _mm512_mask_extracti32x4_epi32::<`1`>(src, `0`, a);
56116	assert_eq_m128i(r, src);
56117	let r = _mm512_mask_extracti32x4_epi32::<`1`>(src, `0b11111111`, a);
56118	let e = _mm_setr_epi32(`5`, `6`, `7`, `8`);
56119	assert_eq_m128i(r, e);
56120	}
56121
56122	#[simd_test(enable = "avx512f,avx512vl")]
56123	const fn test_mm512_maskz_extracti32x4_epi32() {
56124	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
56125	let r = _mm512_maskz_extracti32x4_epi32::<`1`>(`0`, a);
56126	assert_eq_m128i(r, _mm_setzero_si128());
56127	let r = _mm512_maskz_extracti32x4_epi32::<`1`>(`0b00000001`, a);
56128	let e = _mm_setr_epi32(`5`, `0`, `0`, `0`);
56129	assert_eq_m128i(r, e);
56130	}
56131
56132	#[simd_test(enable = "avx512f,avx512vl")]
56133	const fn test_mm256_extracti32x4_epi32() {
56134	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
56135	let r = _mm256_extracti32x4_epi32::<`1`>(a);
56136	let e = _mm_set_epi32(`1`, `2`, `3`, `4`);
56137	assert_eq_m128i(r, e);
56138	}
56139
56140	#[simd_test(enable = "avx512f,avx512vl")]
56141	const fn test_mm256_mask_extracti32x4_epi32() {
56142	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
56143	let src = _mm_set1_epi32(`100`);
56144	let r = _mm256_mask_extracti32x4_epi32::<`1`>(src, `0`, a);
56145	assert_eq_m128i(r, src);
56146	let r = _mm256_mask_extracti32x4_epi32::<`1`>(src, `0b00001111`, a);
56147	let e = _mm_set_epi32(`1`, `2`, `3`, `4`);
56148	assert_eq_m128i(r, e);
56149	}
56150
56151	#[simd_test(enable = "avx512f,avx512vl")]
56152	const fn test_mm256_maskz_extracti32x4_epi32() {
56153	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
56154	let r = _mm256_maskz_extracti32x4_epi32::<`1`>(`0`, a);
56155	assert_eq_m128i(r, _mm_setzero_si128());
56156	let r = _mm256_maskz_extracti32x4_epi32::<`1`>(`0b00001111`, a);
56157	let e = _mm_set_epi32(`1`, `2`, `3`, `4`);
56158	assert_eq_m128i(r, e);
56159	}
56160
56161	#[simd_test(enable = "avx512f")]
56162	const fn test_mm512_moveldup_ps() {
56163	let a = _mm512_setr_ps(
56164	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
56165	);
56166	let r = _mm512_moveldup_ps(a);
56167	let e = _mm512_setr_ps(
56168	`1.`, `1.`, `3.`, `3.`, `5.`, `5.`, `7.`, `7.`, `9.`, `9.`, `11.`, `11.`, `13.`, `13.`, `15.`, `15.`,
56169	);
56170	assert_eq_m512(r, e);
56171	}
56172
56173	#[simd_test(enable = "avx512f")]
56174	const fn test_mm512_mask_moveldup_ps() {
56175	let a = _mm512_setr_ps(
56176	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
56177	);
56178	let r = _mm512_mask_moveldup_ps(a, `0`, a);
56179	assert_eq_m512(r, a);
56180	let r = _mm512_mask_moveldup_ps(a, `0b11111111_11111111`, a);
56181	let e = _mm512_setr_ps(
56182	`1.`, `1.`, `3.`, `3.`, `5.`, `5.`, `7.`, `7.`, `9.`, `9.`, `11.`, `11.`, `13.`, `13.`, `15.`, `15.`,
56183	);
56184	assert_eq_m512(r, e);
56185	}
56186
56187	#[simd_test(enable = "avx512f")]
56188	const fn test_mm512_maskz_moveldup_ps() {
56189	let a = _mm512_setr_ps(
56190	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
56191	);
56192	let r = _mm512_maskz_moveldup_ps(`0`, a);
56193	assert_eq_m512(r, _mm512_setzero_ps());
56194	let r = _mm512_maskz_moveldup_ps(`0b00000000_11111111`, a);
56195	let e = _mm512_setr_ps(
56196	`1.`, `1.`, `3.`, `3.`, `5.`, `5.`, `7.`, `7.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
56197	);
56198	assert_eq_m512(r, e);
56199	}
56200
56201	#[simd_test(enable = "avx512f,avx512vl")]
56202	const fn test_mm256_mask_moveldup_ps() {
56203	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
56204	let r = _mm256_mask_moveldup_ps(a, `0`, a);
56205	assert_eq_m256(r, a);
56206	let r = _mm256_mask_moveldup_ps(a, `0b11111111`, a);
56207	let e = _mm256_set_ps(`2.`, `2.`, `4.`, `4.`, `6.`, `6.`, `8.`, `8.`);
56208	assert_eq_m256(r, e);
56209	}
56210
56211	#[simd_test(enable = "avx512f,avx512vl")]
56212	const fn test_mm256_maskz_moveldup_ps() {
56213	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
56214	let r = _mm256_maskz_moveldup_ps(`0`, a);
56215	assert_eq_m256(r, _mm256_setzero_ps());
56216	let r = _mm256_maskz_moveldup_ps(`0b11111111`, a);
56217	let e = _mm256_set_ps(`2.`, `2.`, `4.`, `4.`, `6.`, `6.`, `8.`, `8.`);
56218	assert_eq_m256(r, e);
56219	}
56220
56221	#[simd_test(enable = "avx512f,avx512vl")]
56222	const fn test_mm_mask_moveldup_ps() {
56223	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
56224	let r = _mm_mask_moveldup_ps(a, `0`, a);
56225	assert_eq_m128(r, a);
56226	let r = _mm_mask_moveldup_ps(a, `0b00001111`, a);
56227	let e = _mm_set_ps(`2.`, `2.`, `4.`, `4.`);
56228	assert_eq_m128(r, e);
56229	}
56230
56231	#[simd_test(enable = "avx512f,avx512vl")]
56232	const fn test_mm_maskz_moveldup_ps() {
56233	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
56234	let r = _mm_maskz_moveldup_ps(`0`, a);
56235	assert_eq_m128(r, _mm_setzero_ps());
56236	let r = _mm_maskz_moveldup_ps(`0b00001111`, a);
56237	let e = _mm_set_ps(`2.`, `2.`, `4.`, `4.`);
56238	assert_eq_m128(r, e);
56239	}
56240
56241	#[simd_test(enable = "avx512f")]
56242	const fn test_mm512_movehdup_ps() {
56243	let a = _mm512_setr_ps(
56244	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
56245	);
56246	let r = _mm512_movehdup_ps(a);
56247	let e = _mm512_setr_ps(
56248	`2.`, `2.`, `4.`, `4.`, `6.`, `6.`, `8.`, `8.`, `10.`, `10.`, `12.`, `12.`, `14.`, `14.`, `16.`, `16.`,
56249	);
56250	assert_eq_m512(r, e);
56251	}
56252
56253	#[simd_test(enable = "avx512f")]
56254	const fn test_mm512_mask_movehdup_ps() {
56255	let a = _mm512_setr_ps(
56256	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
56257	);
56258	let r = _mm512_mask_movehdup_ps(a, `0`, a);
56259	assert_eq_m512(r, a);
56260	let r = _mm512_mask_movehdup_ps(a, `0b11111111_11111111`, a);
56261	let e = _mm512_setr_ps(
56262	`2.`, `2.`, `4.`, `4.`, `6.`, `6.`, `8.`, `8.`, `10.`, `10.`, `12.`, `12.`, `14.`, `14.`, `16.`, `16.`,
56263	);
56264	assert_eq_m512(r, e);
56265	}
56266
56267	#[simd_test(enable = "avx512f")]
56268	const fn test_mm512_maskz_movehdup_ps() {
56269	let a = _mm512_setr_ps(
56270	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
56271	);
56272	let r = _mm512_maskz_movehdup_ps(`0`, a);
56273	assert_eq_m512(r, _mm512_setzero_ps());
56274	let r = _mm512_maskz_movehdup_ps(`0b00000000_11111111`, a);
56275	let e = _mm512_setr_ps(
56276	`2.`, `2.`, `4.`, `4.`, `6.`, `6.`, `8.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
56277	);
56278	assert_eq_m512(r, e);
56279	}
56280
56281	#[simd_test(enable = "avx512f,avx512vl")]
56282	const fn test_mm256_mask_movehdup_ps() {
56283	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
56284	let r = _mm256_mask_movehdup_ps(a, `0`, a);
56285	assert_eq_m256(r, a);
56286	let r = _mm256_mask_movehdup_ps(a, `0b11111111`, a);
56287	let e = _mm256_set_ps(`1.`, `1.`, `3.`, `3.`, `5.`, `5.`, `7.`, `7.`);
56288	assert_eq_m256(r, e);
56289	}
56290
56291	#[simd_test(enable = "avx512f,avx512vl")]
56292	const fn test_mm256_maskz_movehdup_ps() {
56293	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
56294	let r = _mm256_maskz_movehdup_ps(`0`, a);
56295	assert_eq_m256(r, _mm256_setzero_ps());
56296	let r = _mm256_maskz_movehdup_ps(`0b11111111`, a);
56297	let e = _mm256_set_ps(`1.`, `1.`, `3.`, `3.`, `5.`, `5.`, `7.`, `7.`);
56298	assert_eq_m256(r, e);
56299	}
56300
56301	#[simd_test(enable = "avx512f,avx512vl")]
56302	const fn test_mm_mask_movehdup_ps() {
56303	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
56304	let r = _mm_mask_movehdup_ps(a, `0`, a);
56305	assert_eq_m128(r, a);
56306	let r = _mm_mask_movehdup_ps(a, `0b00001111`, a);
56307	let e = _mm_set_ps(`1.`, `1.`, `3.`, `3.`);
56308	assert_eq_m128(r, e);
56309	}
56310
56311	#[simd_test(enable = "avx512f,avx512vl")]
56312	const fn test_mm_maskz_movehdup_ps() {
56313	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
56314	let r = _mm_maskz_movehdup_ps(`0`, a);
56315	assert_eq_m128(r, _mm_setzero_ps());
56316	let r = _mm_maskz_movehdup_ps(`0b00001111`, a);
56317	let e = _mm_set_ps(`1.`, `1.`, `3.`, `3.`);
56318	assert_eq_m128(r, e);
56319	}
56320
56321	#[simd_test(enable = "avx512f")]
56322	const fn test_mm512_inserti32x4() {
56323	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
56324	let b = _mm_setr_epi32(`17`, `18`, `19`, `20`);
56325	let r = _mm512_inserti32x4::<`0`>(a, b);
56326	let e = _mm512_setr_epi32(`17`, `18`, `19`, `20`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
56327	assert_eq_m512i(r, e);
56328	}
56329
56330	#[simd_test(enable = "avx512f")]
56331	const fn test_mm512_mask_inserti32x4() {
56332	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
56333	let b = _mm_setr_epi32(`17`, `18`, `19`, `20`);
56334	let r = _mm512_mask_inserti32x4::<`0`>(a, `0`, a, b);
56335	assert_eq_m512i(r, a);
56336	let r = _mm512_mask_inserti32x4::<`0`>(a, `0b11111111_11111111`, a, b);
56337	let e = _mm512_setr_epi32(`17`, `18`, `19`, `20`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
56338	assert_eq_m512i(r, e);
56339	}
56340
56341	#[simd_test(enable = "avx512f")]
56342	const fn test_mm512_maskz_inserti32x4() {
56343	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
56344	let b = _mm_setr_epi32(`17`, `18`, `19`, `20`);
56345	let r = _mm512_maskz_inserti32x4::<`0`>(`0`, a, b);
56346	assert_eq_m512i(r, _mm512_setzero_si512());
56347	let r = _mm512_maskz_inserti32x4::<`0`>(`0b00000000_11111111`, a, b);
56348	let e = _mm512_setr_epi32(`17`, `18`, `19`, `20`, `5`, `6`, `7`, `8`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
56349	assert_eq_m512i(r, e);
56350	}
56351
56352	#[simd_test(enable = "avx512f,avx512vl")]
56353	const fn test_mm256_inserti32x4() {
56354	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
56355	let b = _mm_set_epi32(`17`, `18`, `19`, `20`);
56356	let r = _mm256_inserti32x4::<`1`>(a, b);
56357	let e = _mm256_set_epi32(`17`, `18`, `19`, `20`, `5`, `6`, `7`, `8`);
56358	assert_eq_m256i(r, e);
56359	}
56360
56361	#[simd_test(enable = "avx512f,avx512vl")]
56362	const fn test_mm256_mask_inserti32x4() {
56363	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
56364	let b = _mm_set_epi32(`17`, `18`, `19`, `20`);
56365	let r = _mm256_mask_inserti32x4::<`0`>(a, `0`, a, b);
56366	assert_eq_m256i(r, a);
56367	let r = _mm256_mask_inserti32x4::<`1`>(a, `0b11111111`, a, b);
56368	let e = _mm256_set_epi32(`17`, `18`, `19`, `20`, `5`, `6`, `7`, `8`);
56369	assert_eq_m256i(r, e);
56370	}
56371
56372	#[simd_test(enable = "avx512f,avx512vl")]
56373	const fn test_mm256_maskz_inserti32x4() {
56374	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
56375	let b = _mm_set_epi32(`17`, `18`, `19`, `20`);
56376	let r = _mm256_maskz_inserti32x4::<`0`>(`0`, a, b);
56377	assert_eq_m256i(r, _mm256_setzero_si256());
56378	let r = _mm256_maskz_inserti32x4::<`1`>(`0b11111111`, a, b);
56379	let e = _mm256_set_epi32(`17`, `18`, `19`, `20`, `5`, `6`, `7`, `8`);
56380	assert_eq_m256i(r, e);
56381	}
56382
56383	#[simd_test(enable = "avx512f")]
56384	const fn test_mm512_insertf32x4() {
56385	let a = _mm512_setr_ps(
56386	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
56387	);
56388	let b = _mm_setr_ps(`17.`, `18.`, `19.`, `20.`);
56389	let r = _mm512_insertf32x4::<`0`>(a, b);
56390	let e = _mm512_setr_ps(
56391	`17.`, `18.`, `19.`, `20.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
56392	);
56393	assert_eq_m512(r, e);
56394	}
56395
56396	#[simd_test(enable = "avx512f")]
56397	const fn test_mm512_mask_insertf32x4() {
56398	let a = _mm512_setr_ps(
56399	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
56400	);
56401	let b = _mm_setr_ps(`17.`, `18.`, `19.`, `20.`);
56402	let r = _mm512_mask_insertf32x4::<`0`>(a, `0`, a, b);
56403	assert_eq_m512(r, a);
56404	let r = _mm512_mask_insertf32x4::<`0`>(a, `0b11111111_11111111`, a, b);
56405	let e = _mm512_setr_ps(
56406	`17.`, `18.`, `19.`, `20.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
56407	);
56408	assert_eq_m512(r, e);
56409	}
56410
56411	#[simd_test(enable = "avx512f")]
56412	const fn test_mm512_maskz_insertf32x4() {
56413	let a = _mm512_setr_ps(
56414	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
56415	);
56416	let b = _mm_setr_ps(`17.`, `18.`, `19.`, `20.`);
56417	let r = _mm512_maskz_insertf32x4::<`0`>(`0`, a, b);
56418	assert_eq_m512(r, _mm512_setzero_ps());
56419	let r = _mm512_maskz_insertf32x4::<`0`>(`0b00000000_11111111`, a, b);
56420	let e = _mm512_setr_ps(
56421	`17.`, `18.`, `19.`, `20.`, `5.`, `6.`, `7.`, `8.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
56422	);
56423	assert_eq_m512(r, e);
56424	}
56425
56426	#[simd_test(enable = "avx512f,avx512vl")]
56427	const fn test_mm256_insertf32x4() {
56428	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
56429	let b = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
56430	let r = _mm256_insertf32x4::<`1`>(a, b);
56431	let e = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `5.`, `6.`, `7.`, `8.`);
56432	assert_eq_m256(r, e);
56433	}
56434
56435	#[simd_test(enable = "avx512f,avx512vl")]
56436	const fn test_mm256_mask_insertf32x4() {
56437	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
56438	let b = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
56439	let r = _mm256_mask_insertf32x4::<`0`>(a, `0`, a, b);
56440	assert_eq_m256(r, a);
56441	let r = _mm256_mask_insertf32x4::<`1`>(a, `0b11111111`, a, b);
56442	let e = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `5.`, `6.`, `7.`, `8.`);
56443	assert_eq_m256(r, e);
56444	}
56445
56446	#[simd_test(enable = "avx512f,avx512vl")]
56447	const fn test_mm256_maskz_insertf32x4() {
56448	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
56449	let b = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
56450	let r = _mm256_maskz_insertf32x4::<`0`>(`0`, a, b);
56451	assert_eq_m256(r, _mm256_setzero_ps());
56452	let r = _mm256_maskz_insertf32x4::<`1`>(`0b11111111`, a, b);
56453	let e = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `5.`, `6.`, `7.`, `8.`);
56454	assert_eq_m256(r, e);
56455	}
56456
56457	#[simd_test(enable = "avx512f")]
56458	const fn test_mm512_castps128_ps512() {
56459	let a = _mm_setr_ps(`17.`, `18.`, `19.`, `20.`);
56460	let r = _mm512_castps128_ps512(a);
56461	assert_eq_m128(_mm512_castps512_ps128(r), a);
56462	}
56463
56464	#[simd_test(enable = "avx512f")]
56465	const fn test_mm512_castps256_ps512() {
56466	let a = _mm256_setr_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
56467	let r = _mm512_castps256_ps512(a);
56468	assert_eq_m256(_mm512_castps512_ps256(r), a);
56469	}
56470
56471	#[simd_test(enable = "avx512f")]
56472	const fn test_mm512_zextps128_ps512() {
56473	let a = _mm_setr_ps(`17.`, `18.`, `19.`, `20.`);
56474	let r = _mm512_zextps128_ps512(a);
56475	let e = _mm512_setr_ps(
56476	`17.`, `18.`, `19.`, `20.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
56477	);
56478	assert_eq_m512(r, e);
56479	}
56480
56481	#[simd_test(enable = "avx512f")]
56482	const fn test_mm512_zextps256_ps512() {
56483	let a = _mm256_setr_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
56484	let r = _mm512_zextps256_ps512(a);
56485	let e = _mm512_setr_ps(
56486	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
56487	);
56488	assert_eq_m512(r, e);
56489	}
56490
56491	#[simd_test(enable = "avx512f")]
56492	const fn test_mm512_castps512_ps128() {
56493	let a = _mm512_setr_ps(
56494	`17.`, `18.`, `19.`, `20.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`,
56495	);
56496	let r = _mm512_castps512_ps128(a);
56497	let e = _mm_setr_ps(`17.`, `18.`, `19.`, `20.`);
56498	assert_eq_m128(r, e);
56499	}
56500
56501	#[simd_test(enable = "avx512f")]
56502	const fn test_mm512_castps512_ps256() {
56503	let a = _mm512_setr_ps(
56504	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`, `-1.`,
56505	);
56506	let r = _mm512_castps512_ps256(a);
56507	let e = _mm256_setr_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
56508	assert_eq_m256(r, e);
56509	}
56510
56511	#[simd_test(enable = "avx512f")]
56512	const fn test_mm512_castps_pd() {
56513	let a = _mm512_set1_ps(`1.`);
56514	let r = _mm512_castps_pd(a);
56515	let e = _mm512_set1_pd(`0.007812501848093234`);
56516	assert_eq_m512d(r, e);
56517	}
56518
56519	#[simd_test(enable = "avx512f")]
56520	const fn test_mm512_castps_si512() {
56521	let a = _mm512_set1_ps(`1.`);
56522	let r = _mm512_castps_si512(a);
56523	let e = _mm512_set1_epi32(`1065353216`);
56524	assert_eq_m512i(r, e);
56525	}
56526
56527	#[simd_test(enable = "avx512f")]
56528	const fn test_mm512_broadcastd_epi32() {
56529	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
56530	let r = _mm512_broadcastd_epi32(a);
56531	let e = _mm512_set1_epi32(`20`);
56532	assert_eq_m512i(r, e);
56533	}
56534
56535	#[simd_test(enable = "avx512f")]
56536	const fn test_mm512_mask_broadcastd_epi32() {
56537	let src = _mm512_set1_epi32(`20`);
56538	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
56539	let r = _mm512_mask_broadcastd_epi32(src, `0`, a);
56540	assert_eq_m512i(r, src);
56541	let r = _mm512_mask_broadcastd_epi32(src, `0b11111111_11111111`, a);
56542	let e = _mm512_set1_epi32(`20`);
56543	assert_eq_m512i(r, e);
56544	}
56545
56546	#[simd_test(enable = "avx512f")]
56547	const fn test_mm512_maskz_broadcastd_epi32() {
56548	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
56549	let r = _mm512_maskz_broadcastd_epi32(`0`, a);
56550	assert_eq_m512i(r, _mm512_setzero_si512());
56551	let r = _mm512_maskz_broadcastd_epi32(`0b00000000_11111111`, a);
56552	let e = _mm512_setr_epi32(`20`, `20`, `20`, `20`, `20`, `20`, `20`, `20`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`);
56553	assert_eq_m512i(r, e);
56554	}
56555
56556	#[simd_test(enable = "avx512f,avx512vl")]
56557	const fn test_mm256_mask_broadcastd_epi32() {
56558	let src = _mm256_set1_epi32(`20`);
56559	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
56560	let r = _mm256_mask_broadcastd_epi32(src, `0`, a);
56561	assert_eq_m256i(r, src);
56562	let r = _mm256_mask_broadcastd_epi32(src, `0b11111111`, a);
56563	let e = _mm256_set1_epi32(`20`);
56564	assert_eq_m256i(r, e);
56565	}
56566
56567	#[simd_test(enable = "avx512f,avx512vl")]
56568	const fn test_mm256_maskz_broadcastd_epi32() {
56569	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
56570	let r = _mm256_maskz_broadcastd_epi32(`0`, a);
56571	assert_eq_m256i(r, _mm256_setzero_si256());
56572	let r = _mm256_maskz_broadcastd_epi32(`0b11111111`, a);
56573	let e = _mm256_set1_epi32(`20`);
56574	assert_eq_m256i(r, e);
56575	}
56576
56577	#[simd_test(enable = "avx512f,avx512vl")]
56578	const fn test_mm_mask_broadcastd_epi32() {
56579	let src = _mm_set1_epi32(`20`);
56580	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
56581	let r = _mm_mask_broadcastd_epi32(src, `0`, a);
56582	assert_eq_m128i(r, src);
56583	let r = _mm_mask_broadcastd_epi32(src, `0b00001111`, a);
56584	let e = _mm_set1_epi32(`20`);
56585	assert_eq_m128i(r, e);
56586	}
56587
56588	#[simd_test(enable = "avx512f,avx512vl")]
56589	const fn test_mm_maskz_broadcastd_epi32() {
56590	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
56591	let r = _mm_maskz_broadcastd_epi32(`0`, a);
56592	assert_eq_m128i(r, _mm_setzero_si128());
56593	let r = _mm_maskz_broadcastd_epi32(`0b00001111`, a);
56594	let e = _mm_set1_epi32(`20`);
56595	assert_eq_m128i(r, e);
56596	}
56597
56598	#[simd_test(enable = "avx512f")]
56599	const fn test_mm512_broadcastss_ps() {
56600	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
56601	let r = _mm512_broadcastss_ps(a);
56602	let e = _mm512_set1_ps(`20.`);
56603	assert_eq_m512(r, e);
56604	}
56605
56606	#[simd_test(enable = "avx512f")]
56607	const fn test_mm512_mask_broadcastss_ps() {
56608	let src = _mm512_set1_ps(`20.`);
56609	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
56610	let r = _mm512_mask_broadcastss_ps(src, `0`, a);
56611	assert_eq_m512(r, src);
56612	let r = _mm512_mask_broadcastss_ps(src, `0b11111111_11111111`, a);
56613	let e = _mm512_set1_ps(`20.`);
56614	assert_eq_m512(r, e);
56615	}
56616
56617	#[simd_test(enable = "avx512f")]
56618	const fn test_mm512_maskz_broadcastss_ps() {
56619	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
56620	let r = _mm512_maskz_broadcastss_ps(`0`, a);
56621	assert_eq_m512(r, _mm512_setzero_ps());
56622	let r = _mm512_maskz_broadcastss_ps(`0b00000000_11111111`, a);
56623	let e = _mm512_setr_ps(
56624	`20.`, `20.`, `20.`, `20.`, `20.`, `20.`, `20.`, `20.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`,
56625	);
56626	assert_eq_m512(r, e);
56627	}
56628
56629	#[simd_test(enable = "avx512f,avx512vl")]
56630	const fn test_mm256_mask_broadcastss_ps() {
56631	let src = _mm256_set1_ps(`20.`);
56632	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
56633	let r = _mm256_mask_broadcastss_ps(src, `0`, a);
56634	assert_eq_m256(r, src);
56635	let r = _mm256_mask_broadcastss_ps(src, `0b11111111`, a);
56636	let e = _mm256_set1_ps(`20.`);
56637	assert_eq_m256(r, e);
56638	}
56639
56640	#[simd_test(enable = "avx512f,avx512vl")]
56641	const fn test_mm256_maskz_broadcastss_ps() {
56642	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
56643	let r = _mm256_maskz_broadcastss_ps(`0`, a);
56644	assert_eq_m256(r, _mm256_setzero_ps());
56645	let r = _mm256_maskz_broadcastss_ps(`0b11111111`, a);
56646	let e = _mm256_set1_ps(`20.`);
56647	assert_eq_m256(r, e);
56648	}
56649
56650	#[simd_test(enable = "avx512f,avx512vl")]
56651	const fn test_mm_mask_broadcastss_ps() {
56652	let src = _mm_set1_ps(`20.`);
56653	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
56654	let r = _mm_mask_broadcastss_ps(src, `0`, a);
56655	assert_eq_m128(r, src);
56656	let r = _mm_mask_broadcastss_ps(src, `0b00001111`, a);
56657	let e = _mm_set1_ps(`20.`);
56658	assert_eq_m128(r, e);
56659	}
56660
56661	#[simd_test(enable = "avx512f,avx512vl")]
56662	const fn test_mm_maskz_broadcastss_ps() {
56663	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
56664	let r = _mm_maskz_broadcastss_ps(`0`, a);
56665	assert_eq_m128(r, _mm_setzero_ps());
56666	let r = _mm_maskz_broadcastss_ps(`0b00001111`, a);
56667	let e = _mm_set1_ps(`20.`);
56668	assert_eq_m128(r, e);
56669	}
56670
56671	#[simd_test(enable = "avx512f")]
56672	const fn test_mm512_broadcast_i32x4() {
56673	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
56674	let r = _mm512_broadcast_i32x4(a);
56675	let e = _mm512_set_epi32(
56676	`17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`,
56677	);
56678	assert_eq_m512i(r, e);
56679	}
56680
56681	#[simd_test(enable = "avx512f")]
56682	const fn test_mm512_mask_broadcast_i32x4() {
56683	let src = _mm512_set1_epi32(`20`);
56684	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
56685	let r = _mm512_mask_broadcast_i32x4(src, `0`, a);
56686	assert_eq_m512i(r, src);
56687	let r = _mm512_mask_broadcast_i32x4(src, `0b11111111_11111111`, a);
56688	let e = _mm512_set_epi32(
56689	`17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`,
56690	);
56691	assert_eq_m512i(r, e);
56692	}
56693
56694	#[simd_test(enable = "avx512f")]
56695	const fn test_mm512_maskz_broadcast_i32x4() {
56696	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
56697	let r = _mm512_maskz_broadcast_i32x4(`0`, a);
56698	assert_eq_m512i(r, _mm512_setzero_si512());
56699	let r = _mm512_maskz_broadcast_i32x4(`0b00000000_11111111`, a);
56700	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`);
56701	assert_eq_m512i(r, e);
56702	}
56703
56704	#[simd_test(enable = "avx512f,avx512vl")]
56705	const fn test_mm256_broadcast_i32x4() {
56706	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
56707	let r = _mm256_broadcast_i32x4(a);
56708	let e = _mm256_set_epi32(`17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`);
56709	assert_eq_m256i(r, e);
56710	}
56711
56712	#[simd_test(enable = "avx512f,avx512vl")]
56713	const fn test_mm256_mask_broadcast_i32x4() {
56714	let src = _mm256_set1_epi32(`20`);
56715	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
56716	let r = _mm256_mask_broadcast_i32x4(src, `0`, a);
56717	assert_eq_m256i(r, src);
56718	let r = _mm256_mask_broadcast_i32x4(src, `0b11111111`, a);
56719	let e = _mm256_set_epi32(`17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`);
56720	assert_eq_m256i(r, e);
56721	}
56722
56723	#[simd_test(enable = "avx512f,avx512vl")]
56724	const fn test_mm256_maskz_broadcast_i32x4() {
56725	let a = _mm_set_epi32(`17`, `18`, `19`, `20`);
56726	let r = _mm256_maskz_broadcast_i32x4(`0`, a);
56727	assert_eq_m256i(r, _mm256_setzero_si256());
56728	let r = _mm256_maskz_broadcast_i32x4(`0b11111111`, a);
56729	let e = _mm256_set_epi32(`17`, `18`, `19`, `20`, `17`, `18`, `19`, `20`);
56730	assert_eq_m256i(r, e);
56731	}
56732
56733	#[simd_test(enable = "avx512f")]
56734	const fn test_mm512_broadcast_f32x4() {
56735	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
56736	let r = _mm512_broadcast_f32x4(a);
56737	let e = _mm512_set_ps(
56738	`17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`,
56739	);
56740	assert_eq_m512(r, e);
56741	}
56742
56743	#[simd_test(enable = "avx512f")]
56744	const fn test_mm512_mask_broadcast_f32x4() {
56745	let src = _mm512_set1_ps(`20.`);
56746	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
56747	let r = _mm512_mask_broadcast_f32x4(src, `0`, a);
56748	assert_eq_m512(r, src);
56749	let r = _mm512_mask_broadcast_f32x4(src, `0b11111111_11111111`, a);
56750	let e = _mm512_set_ps(
56751	`17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`,
56752	);
56753	assert_eq_m512(r, e);
56754	}
56755
56756	#[simd_test(enable = "avx512f")]
56757	const fn test_mm512_maskz_broadcast_f32x4() {
56758	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
56759	let r = _mm512_maskz_broadcast_f32x4(`0`, a);
56760	assert_eq_m512(r, _mm512_setzero_ps());
56761	let r = _mm512_maskz_broadcast_f32x4(`0b00000000_11111111`, a);
56762	let e = _mm512_set_ps(
56763	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`,
56764	);
56765	assert_eq_m512(r, e);
56766	}
56767
56768	#[simd_test(enable = "avx512f,avx512vl")]
56769	const fn test_mm256_broadcast_f32x4() {
56770	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
56771	let r = _mm256_broadcast_f32x4(a);
56772	let e = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`);
56773	assert_eq_m256(r, e);
56774	}
56775
56776	#[simd_test(enable = "avx512f,avx512vl")]
56777	const fn test_mm256_mask_broadcast_f32x4() {
56778	let src = _mm256_set1_ps(`20.`);
56779	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
56780	let r = _mm256_mask_broadcast_f32x4(src, `0`, a);
56781	assert_eq_m256(r, src);
56782	let r = _mm256_mask_broadcast_f32x4(src, `0b11111111`, a);
56783	let e = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`);
56784	assert_eq_m256(r, e);
56785	}
56786
56787	#[simd_test(enable = "avx512f,avx512vl")]
56788	const fn test_mm256_maskz_broadcast_f32x4() {
56789	let a = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
56790	let r = _mm256_maskz_broadcast_f32x4(`0`, a);
56791	assert_eq_m256(r, _mm256_setzero_ps());
56792	let r = _mm256_maskz_broadcast_f32x4(`0b11111111`, a);
56793	let e = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `17.`, `18.`, `19.`, `20.`);
56794	assert_eq_m256(r, e);
56795	}
56796
56797	#[simd_test(enable = "avx512f")]
56798	const fn test_mm512_mask_blend_epi32() {
56799	let a = _mm512_set1_epi32(`1`);
56800	let b = _mm512_set1_epi32(`2`);
56801	let r = _mm512_mask_blend_epi32(`0b11111111_00000000`, a, b);
56802	let e = _mm512_set_epi32(`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`);
56803	assert_eq_m512i(r, e);
56804	}
56805
56806	#[simd_test(enable = "avx512f,avx512vl")]
56807	const fn test_mm256_mask_blend_epi32() {
56808	let a = _mm256_set1_epi32(`1`);
56809	let b = _mm256_set1_epi32(`2`);
56810	let r = _mm256_mask_blend_epi32(`0b11111111`, a, b);
56811	let e = _mm256_set1_epi32(`2`);
56812	assert_eq_m256i(r, e);
56813	}
56814
56815	#[simd_test(enable = "avx512f,avx512vl")]
56816	const fn test_mm_mask_blend_epi32() {
56817	let a = _mm_set1_epi32(`1`);
56818	let b = _mm_set1_epi32(`2`);
56819	let r = _mm_mask_blend_epi32(`0b00001111`, a, b);
56820	let e = _mm_set1_epi32(`2`);
56821	assert_eq_m128i(r, e);
56822	}
56823
56824	#[simd_test(enable = "avx512f")]
56825	const fn test_mm512_mask_blend_ps() {
56826	let a = _mm512_set1_ps(`1.`);
56827	let b = _mm512_set1_ps(`2.`);
56828	let r = _mm512_mask_blend_ps(`0b11111111_00000000`, a, b);
56829	let e = _mm512_set_ps(
56830	`2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`, `1.`,
56831	);
56832	assert_eq_m512(r, e);
56833	}
56834
56835	#[simd_test(enable = "avx512f,avx512vl")]
56836	const fn test_mm256_mask_blend_ps() {
56837	let a = _mm256_set1_ps(`1.`);
56838	let b = _mm256_set1_ps(`2.`);
56839	let r = _mm256_mask_blend_ps(`0b11111111`, a, b);
56840	let e = _mm256_set1_ps(`2.`);
56841	assert_eq_m256(r, e);
56842	}
56843
56844	#[simd_test(enable = "avx512f,avx512vl")]
56845	const fn test_mm_mask_blend_ps() {
56846	let a = _mm_set1_ps(`1.`);
56847	let b = _mm_set1_ps(`2.`);
56848	let r = _mm_mask_blend_ps(`0b00001111`, a, b);
56849	let e = _mm_set1_ps(`2.`);
56850	assert_eq_m128(r, e);
56851	}
56852
56853	#[simd_test(enable = "avx512f")]
56854	const fn test_mm512_unpackhi_epi32() {
56855	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
56856	let b = _mm512_set_epi32(
56857	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
56858	);
56859	let r = _mm512_unpackhi_epi32(a, b);
56860	let e = _mm512_set_epi32(`17`, `1`, `18`, `2`, `21`, `5`, `22`, `6`, `25`, `9`, `26`, `10`, `29`, `13`, `30`, `14`);
56861	assert_eq_m512i(r, e);
56862	}
56863
56864	#[simd_test(enable = "avx512f")]
56865	const fn test_mm512_mask_unpackhi_epi32() {
56866	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
56867	let b = _mm512_set_epi32(
56868	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
56869	);
56870	let r = _mm512_mask_unpackhi_epi32(a, `0`, a, b);
56871	assert_eq_m512i(r, a);
56872	let r = _mm512_mask_unpackhi_epi32(a, `0b11111111_11111111`, a, b);
56873	let e = _mm512_set_epi32(`17`, `1`, `18`, `2`, `21`, `5`, `22`, `6`, `25`, `9`, `26`, `10`, `29`, `13`, `30`, `14`);
56874	assert_eq_m512i(r, e);
56875	}
56876
56877	#[simd_test(enable = "avx512f")]
56878	const fn test_mm512_maskz_unpackhi_epi32() {
56879	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
56880	let b = _mm512_set_epi32(
56881	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
56882	);
56883	let r = _mm512_maskz_unpackhi_epi32(`0`, a, b);
56884	assert_eq_m512i(r, _mm512_setzero_si512());
56885	let r = _mm512_maskz_unpackhi_epi32(`0b00000000_11111111`, a, b);
56886	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `25`, `9`, `26`, `10`, `29`, `13`, `30`, `14`);
56887	assert_eq_m512i(r, e);
56888	}
56889
56890	#[simd_test(enable = "avx512f,avx512vl")]
56891	const fn test_mm256_mask_unpackhi_epi32() {
56892	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
56893	let b = _mm256_set_epi32(`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`);
56894	let r = _mm256_mask_unpackhi_epi32(a, `0`, a, b);
56895	assert_eq_m256i(r, a);
56896	let r = _mm256_mask_unpackhi_epi32(a, `0b11111111`, a, b);
56897	let e = _mm256_set_epi32(`17`, `1`, `18`, `2`, `21`, `5`, `22`, `6`);
56898	assert_eq_m256i(r, e);
56899	}
56900
56901	#[simd_test(enable = "avx512f,avx512vl")]
56902	const fn test_mm256_maskz_unpackhi_epi32() {
56903	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
56904	let b = _mm256_set_epi32(`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`);
56905	let r = _mm256_maskz_unpackhi_epi32(`0`, a, b);
56906	assert_eq_m256i(r, _mm256_setzero_si256());
56907	let r = _mm256_maskz_unpackhi_epi32(`0b11111111`, a, b);
56908	let e = _mm256_set_epi32(`17`, `1`, `18`, `2`, `21`, `5`, `22`, `6`);
56909	assert_eq_m256i(r, e);
56910	}
56911
56912	#[simd_test(enable = "avx512f,avx512vl")]
56913	const fn test_mm_mask_unpackhi_epi32() {
56914	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
56915	let b = _mm_set_epi32(`17`, `18`, `19`, `20`);
56916	let r = _mm_mask_unpackhi_epi32(a, `0`, a, b);
56917	assert_eq_m128i(r, a);
56918	let r = _mm_mask_unpackhi_epi32(a, `0b00001111`, a, b);
56919	let e = _mm_set_epi32(`17`, `1`, `18`, `2`);
56920	assert_eq_m128i(r, e);
56921	}
56922
56923	#[simd_test(enable = "avx512f,avx512vl")]
56924	const fn test_mm_maskz_unpackhi_epi32() {
56925	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
56926	let b = _mm_set_epi32(`17`, `18`, `19`, `20`);
56927	let r = _mm_maskz_unpackhi_epi32(`0`, a, b);
56928	assert_eq_m128i(r, _mm_setzero_si128());
56929	let r = _mm_maskz_unpackhi_epi32(`0b00001111`, a, b);
56930	let e = _mm_set_epi32(`17`, `1`, `18`, `2`);
56931	assert_eq_m128i(r, e);
56932	}
56933
56934	#[simd_test(enable = "avx512f")]
56935	const fn test_mm512_unpackhi_ps() {
56936	let a = _mm512_set_ps(
56937	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
56938	);
56939	let b = _mm512_set_ps(
56940	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`,
56941	);
56942	let r = _mm512_unpackhi_ps(a, b);
56943	let e = _mm512_set_ps(
56944	`17.`, `1.`, `18.`, `2.`, `21.`, `5.`, `22.`, `6.`, `25.`, `9.`, `26.`, `10.`, `29.`, `13.`, `30.`, `14.`,
56945	);
56946	assert_eq_m512(r, e);
56947	}
56948
56949	#[simd_test(enable = "avx512f")]
56950	const fn test_mm512_mask_unpackhi_ps() {
56951	let a = _mm512_set_ps(
56952	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
56953	);
56954	let b = _mm512_set_ps(
56955	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`,
56956	);
56957	let r = _mm512_mask_unpackhi_ps(a, `0`, a, b);
56958	assert_eq_m512(r, a);
56959	let r = _mm512_mask_unpackhi_ps(a, `0b11111111_11111111`, a, b);
56960	let e = _mm512_set_ps(
56961	`17.`, `1.`, `18.`, `2.`, `21.`, `5.`, `22.`, `6.`, `25.`, `9.`, `26.`, `10.`, `29.`, `13.`, `30.`, `14.`,
56962	);
56963	assert_eq_m512(r, e);
56964	}
56965
56966	#[simd_test(enable = "avx512f")]
56967	const fn test_mm512_maskz_unpackhi_ps() {
56968	let a = _mm512_set_ps(
56969	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
56970	);
56971	let b = _mm512_set_ps(
56972	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`,
56973	);
56974	let r = _mm512_maskz_unpackhi_ps(`0`, a, b);
56975	assert_eq_m512(r, _mm512_setzero_ps());
56976	let r = _mm512_maskz_unpackhi_ps(`0b00000000_11111111`, a, b);
56977	let e = _mm512_set_ps(
56978	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `25.`, `9.`, `26.`, `10.`, `29.`, `13.`, `30.`, `14.`,
56979	);
56980	assert_eq_m512(r, e);
56981	}
56982
56983	#[simd_test(enable = "avx512f,avx512vl")]
56984	const fn test_mm256_mask_unpackhi_ps() {
56985	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
56986	let b = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
56987	let r = _mm256_mask_unpackhi_ps(a, `0`, a, b);
56988	assert_eq_m256(r, a);
56989	let r = _mm256_mask_unpackhi_ps(a, `0b11111111`, a, b);
56990	let e = _mm256_set_ps(`17.`, `1.`, `18.`, `2.`, `21.`, `5.`, `22.`, `6.`);
56991	assert_eq_m256(r, e);
56992	}
56993
56994	#[simd_test(enable = "avx512f,avx512vl")]
56995	const fn test_mm256_maskz_unpackhi_ps() {
56996	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
56997	let b = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
56998	let r = _mm256_maskz_unpackhi_ps(`0`, a, b);
56999	assert_eq_m256(r, _mm256_setzero_ps());
57000	let r = _mm256_maskz_unpackhi_ps(`0b11111111`, a, b);
57001	let e = _mm256_set_ps(`17.`, `1.`, `18.`, `2.`, `21.`, `5.`, `22.`, `6.`);
57002	assert_eq_m256(r, e);
57003	}
57004
57005	#[simd_test(enable = "avx512f,avx512vl")]
57006	const fn test_mm_mask_unpackhi_ps() {
57007	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
57008	let b = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
57009	let r = _mm_mask_unpackhi_ps(a, `0`, a, b);
57010	assert_eq_m128(r, a);
57011	let r = _mm_mask_unpackhi_ps(a, `0b00001111`, a, b);
57012	let e = _mm_set_ps(`17.`, `1.`, `18.`, `2.`);
57013	assert_eq_m128(r, e);
57014	}
57015
57016	#[simd_test(enable = "avx512f,avx512vl")]
57017	const fn test_mm_maskz_unpackhi_ps() {
57018	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
57019	let b = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
57020	let r = _mm_maskz_unpackhi_ps(`0`, a, b);
57021	assert_eq_m128(r, _mm_setzero_ps());
57022	let r = _mm_maskz_unpackhi_ps(`0b00001111`, a, b);
57023	let e = _mm_set_ps(`17.`, `1.`, `18.`, `2.`);
57024	assert_eq_m128(r, e);
57025	}
57026
57027	#[simd_test(enable = "avx512f")]
57028	const fn test_mm512_unpacklo_epi32() {
57029	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
57030	let b = _mm512_set_epi32(
57031	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
57032	);
57033	let r = _mm512_unpacklo_epi32(a, b);
57034	let e = _mm512_set_epi32(`19`, `3`, `20`, `4`, `23`, `7`, `24`, `8`, `27`, `11`, `28`, `12`, `31`, `15`, `32`, `16`);
57035	assert_eq_m512i(r, e);
57036	}
57037
57038	#[simd_test(enable = "avx512f")]
57039	const fn test_mm512_mask_unpacklo_epi32() {
57040	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
57041	let b = _mm512_set_epi32(
57042	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
57043	);
57044	let r = _mm512_mask_unpacklo_epi32(a, `0`, a, b);
57045	assert_eq_m512i(r, a);
57046	let r = _mm512_mask_unpacklo_epi32(a, `0b11111111_11111111`, a, b);
57047	let e = _mm512_set_epi32(`19`, `3`, `20`, `4`, `23`, `7`, `24`, `8`, `27`, `11`, `28`, `12`, `31`, `15`, `32`, `16`);
57048	assert_eq_m512i(r, e);
57049	}
57050
57051	#[simd_test(enable = "avx512f")]
57052	const fn test_mm512_maskz_unpacklo_epi32() {
57053	let a = _mm512_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
57054	let b = _mm512_set_epi32(
57055	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`,
57056	);
57057	let r = _mm512_maskz_unpacklo_epi32(`0`, a, b);
57058	assert_eq_m512i(r, _mm512_setzero_si512());
57059	let r = _mm512_maskz_unpacklo_epi32(`0b00000000_11111111`, a, b);
57060	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `27`, `11`, `28`, `12`, `31`, `15`, `32`, `16`);
57061	assert_eq_m512i(r, e);
57062	}
57063
57064	#[simd_test(enable = "avx512f,avx512vl")]
57065	const fn test_mm256_mask_unpacklo_epi32() {
57066	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
57067	let b = _mm256_set_epi32(`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`);
57068	let r = _mm256_mask_unpacklo_epi32(a, `0`, a, b);
57069	assert_eq_m256i(r, a);
57070	let r = _mm256_mask_unpacklo_epi32(a, `0b11111111`, a, b);
57071	let e = _mm256_set_epi32(`19`, `3`, `20`, `4`, `23`, `7`, `24`, `8`);
57072	assert_eq_m256i(r, e);
57073	}
57074
57075	#[simd_test(enable = "avx512f,avx512vl")]
57076	const fn test_mm256_maskz_unpacklo_epi32() {
57077	let a = _mm256_set_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
57078	let b = _mm256_set_epi32(`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`);
57079	let r = _mm256_maskz_unpacklo_epi32(`0`, a, b);
57080	assert_eq_m256i(r, _mm256_setzero_si256());
57081	let r = _mm256_maskz_unpacklo_epi32(`0b11111111`, a, b);
57082	let e = _mm256_set_epi32(`19`, `3`, `20`, `4`, `23`, `7`, `24`, `8`);
57083	assert_eq_m256i(r, e);
57084	}
57085
57086	#[simd_test(enable = "avx512f,avx512vl")]
57087	const fn test_mm_mask_unpacklo_epi32() {
57088	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
57089	let b = _mm_set_epi32(`17`, `18`, `19`, `20`);
57090	let r = _mm_mask_unpacklo_epi32(a, `0`, a, b);
57091	assert_eq_m128i(r, a);
57092	let r = _mm_mask_unpacklo_epi32(a, `0b00001111`, a, b);
57093	let e = _mm_set_epi32(`19`, `3`, `20`, `4`);
57094	assert_eq_m128i(r, e);
57095	}
57096
57097	#[simd_test(enable = "avx512f,avx512vl")]
57098	const fn test_mm_maskz_unpacklo_epi32() {
57099	let a = _mm_set_epi32(`1`, `2`, `3`, `4`);
57100	let b = _mm_set_epi32(`17`, `18`, `19`, `20`);
57101	let r = _mm_maskz_unpacklo_epi32(`0`, a, b);
57102	assert_eq_m128i(r, _mm_setzero_si128());
57103	let r = _mm_maskz_unpacklo_epi32(`0b00001111`, a, b);
57104	let e = _mm_set_epi32(`19`, `3`, `20`, `4`);
57105	assert_eq_m128i(r, e);
57106	}
57107
57108	#[simd_test(enable = "avx512f")]
57109	const fn test_mm512_unpacklo_ps() {
57110	let a = _mm512_set_ps(
57111	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
57112	);
57113	let b = _mm512_set_ps(
57114	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`,
57115	);
57116	let r = _mm512_unpacklo_ps(a, b);
57117	let e = _mm512_set_ps(
57118	`19.`, `3.`, `20.`, `4.`, `23.`, `7.`, `24.`, `8.`, `27.`, `11.`, `28.`, `12.`, `31.`, `15.`, `32.`, `16.`,
57119	);
57120	assert_eq_m512(r, e);
57121	}
57122
57123	#[simd_test(enable = "avx512f")]
57124	const fn test_mm512_mask_unpacklo_ps() {
57125	let a = _mm512_set_ps(
57126	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
57127	);
57128	let b = _mm512_set_ps(
57129	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`,
57130	);
57131	let r = _mm512_mask_unpacklo_ps(a, `0`, a, b);
57132	assert_eq_m512(r, a);
57133	let r = _mm512_mask_unpacklo_ps(a, `0b11111111_11111111`, a, b);
57134	let e = _mm512_set_ps(
57135	`19.`, `3.`, `20.`, `4.`, `23.`, `7.`, `24.`, `8.`, `27.`, `11.`, `28.`, `12.`, `31.`, `15.`, `32.`, `16.`,
57136	);
57137	assert_eq_m512(r, e);
57138	}
57139
57140	#[simd_test(enable = "avx512f")]
57141	const fn test_mm512_maskz_unpacklo_ps() {
57142	let a = _mm512_set_ps(
57143	`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
57144	);
57145	let b = _mm512_set_ps(
57146	`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`, `25.`, `26.`, `27.`, `28.`, `29.`, `30.`, `31.`, `32.`,
57147	);
57148	let r = _mm512_maskz_unpacklo_ps(`0`, a, b);
57149	assert_eq_m512(r, _mm512_setzero_ps());
57150	let r = _mm512_maskz_unpacklo_ps(`0b00000000_11111111`, a, b);
57151	let e = _mm512_set_ps(
57152	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `27.`, `11.`, `28.`, `12.`, `31.`, `15.`, `32.`, `16.`,
57153	);
57154	assert_eq_m512(r, e);
57155	}
57156
57157	#[simd_test(enable = "avx512f,avx512vl")]
57158	const fn test_mm256_mask_unpacklo_ps() {
57159	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
57160	let b = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
57161	let r = _mm256_mask_unpacklo_ps(a, `0`, a, b);
57162	assert_eq_m256(r, a);
57163	let r = _mm256_mask_unpacklo_ps(a, `0b11111111`, a, b);
57164	let e = _mm256_set_ps(`19.`, `3.`, `20.`, `4.`, `23.`, `7.`, `24.`, `8.`);
57165	assert_eq_m256(r, e);
57166	}
57167
57168	#[simd_test(enable = "avx512f,avx512vl")]
57169	const fn test_mm256_maskz_unpacklo_ps() {
57170	let a = _mm256_set_ps(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
57171	let b = _mm256_set_ps(`17.`, `18.`, `19.`, `20.`, `21.`, `22.`, `23.`, `24.`);
57172	let r = _mm256_maskz_unpacklo_ps(`0`, a, b);
57173	assert_eq_m256(r, _mm256_setzero_ps());
57174	let r = _mm256_maskz_unpacklo_ps(`0b11111111`, a, b);
57175	let e = _mm256_set_ps(`19.`, `3.`, `20.`, `4.`, `23.`, `7.`, `24.`, `8.`);
57176	assert_eq_m256(r, e);
57177	}
57178
57179	#[simd_test(enable = "avx512f,avx512vl")]
57180	const fn test_mm_mask_unpacklo_ps() {
57181	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
57182	let b = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
57183	let r = _mm_mask_unpacklo_ps(a, `0`, a, b);
57184	assert_eq_m128(r, a);
57185	let r = _mm_mask_unpacklo_ps(a, `0b00001111`, a, b);
57186	let e = _mm_set_ps(`19.`, `3.`, `20.`, `4.`);
57187	assert_eq_m128(r, e);
57188	}
57189
57190	#[simd_test(enable = "avx512f,avx512vl")]
57191	const fn test_mm_maskz_unpacklo_ps() {
57192	let a = _mm_set_ps(`1.`, `2.`, `3.`, `4.`);
57193	let b = _mm_set_ps(`17.`, `18.`, `19.`, `20.`);
57194	let r = _mm_maskz_unpacklo_ps(`0`, a, b);
57195	assert_eq_m128(r, _mm_setzero_ps());
57196	let r = _mm_maskz_unpacklo_ps(`0b00001111`, a, b);
57197	let e = _mm_set_ps(`19.`, `3.`, `20.`, `4.`);
57198	assert_eq_m128(r, e);
57199	}
57200
57201	#[simd_test(enable = "avx512f")]
57202	const fn test_mm512_alignr_epi32() {
57203	let a = _mm512_set_epi32(`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`);
57204	let b = _mm512_set_epi32(
57205	`32`, `31`, `30`, `29`, `28`, `27`, `26`, `25`, `24`, `23`, `22`, `21`, `20`, `19`, `18`, `17`,
57206	);
57207	let r = _mm512_alignr_epi32::<`0`>(a, b);
57208	assert_eq_m512i(r, b);
57209	let r = _mm512_alignr_epi32::<`16`>(a, b);
57210	assert_eq_m512i(r, b);
57211	let r = _mm512_alignr_epi32::<`1`>(a, b);
57212	let e = _mm512_set_epi32(
57213	`1`, `32`, `31`, `30`, `29`, `28`, `27`, `26`, `25`, `24`, `23`, `22`, `21`, `20`, `19`, `18`,
57214	);
57215	assert_eq_m512i(r, e);
57216	}
57217
57218	#[simd_test(enable = "avx512f")]
57219	const fn test_mm512_mask_alignr_epi32() {
57220	let a = _mm512_set_epi32(`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`);
57221	let b = _mm512_set_epi32(
57222	`32`, `31`, `30`, `29`, `28`, `27`, `26`, `25`, `24`, `23`, `22`, `21`, `20`, `19`, `18`, `17`,
57223	);
57224	let r = _mm512_mask_alignr_epi32::<`1`>(a, `0`, a, b);
57225	assert_eq_m512i(r, a);
57226	let r = _mm512_mask_alignr_epi32::<`1`>(a, `0b11111111_11111111`, a, b);
57227	let e = _mm512_set_epi32(
57228	`1`, `32`, `31`, `30`, `29`, `28`, `27`, `26`, `25`, `24`, `23`, `22`, `21`, `20`, `19`, `18`,
57229	);
57230	assert_eq_m512i(r, e);
57231	}
57232
57233	#[simd_test(enable = "avx512f")]
57234	const fn test_mm512_maskz_alignr_epi32() {
57235	let a = _mm512_set_epi32(`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`, `8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`);
57236	let b = _mm512_set_epi32(
57237	`32`, `31`, `30`, `29`, `28`, `27`, `26`, `25`, `24`, `23`, `22`, `21`, `20`, `19`, `18`, `17`,
57238	);
57239	let r = _mm512_maskz_alignr_epi32::<`1`>(`0`, a, b);
57240	assert_eq_m512i(r, _mm512_setzero_si512());
57241	let r = _mm512_maskz_alignr_epi32::<`1`>(`0b00000000_11111111`, a, b);
57242	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `25`, `24`, `23`, `22`, `21`, `20`, `19`, `18`);
57243	assert_eq_m512i(r, e);
57244	}
57245
57246	#[simd_test(enable = "avx512f,avx512vl")]
57247	const fn test_mm256_alignr_epi32() {
57248	let a = _mm256_set_epi32(`8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`);
57249	let b = _mm256_set_epi32(`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`);
57250	let r = _mm256_alignr_epi32::<`0`>(a, b);
57251	assert_eq_m256i(r, b);
57252	let r = _mm256_alignr_epi32::<`1`>(a, b);
57253	let e = _mm256_set_epi32(`1`, `16`, `15`, `14`, `13`, `12`, `11`, `10`);
57254	assert_eq_m256i(r, e);
57255	}
57256
57257	#[simd_test(enable = "avx512f,avx512vl")]
57258	const fn test_mm256_mask_alignr_epi32() {
57259	let a = _mm256_set_epi32(`8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`);
57260	let b = _mm256_set_epi32(`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`);
57261	let r = _mm256_mask_alignr_epi32::<`1`>(a, `0`, a, b);
57262	assert_eq_m256i(r, a);
57263	let r = _mm256_mask_alignr_epi32::<`1`>(a, `0b11111111`, a, b);
57264	let e = _mm256_set_epi32(`1`, `16`, `15`, `14`, `13`, `12`, `11`, `10`);
57265	assert_eq_m256i(r, e);
57266	}
57267
57268	#[simd_test(enable = "avx512f,avx512vl")]
57269	const fn test_mm256_maskz_alignr_epi32() {
57270	let a = _mm256_set_epi32(`8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`);
57271	let b = _mm256_set_epi32(`16`, `15`, `14`, `13`, `12`, `11`, `10`, `9`);
57272	let r = _mm256_maskz_alignr_epi32::<`1`>(`0`, a, b);
57273	assert_eq_m256i(r, _mm256_setzero_si256());
57274	let r = _mm256_maskz_alignr_epi32::<`1`>(`0b11111111`, a, b);
57275	let e = _mm256_set_epi32(`1`, `16`, `15`, `14`, `13`, `12`, `11`, `10`);
57276	assert_eq_m256i(r, e);
57277	}
57278
57279	#[simd_test(enable = "avx512f,avx512vl")]
57280	const fn test_mm_alignr_epi32() {
57281	let a = _mm_set_epi32(`4`, `3`, `2`, `1`);
57282	let b = _mm_set_epi32(`8`, `7`, `6`, `5`);
57283	let r = _mm_alignr_epi32::<`0`>(a, b);
57284	assert_eq_m128i(r, b);
57285	let r = _mm_alignr_epi32::<`1`>(a, b);
57286	let e = _mm_set_epi32(`1`, `8`, `7`, `6`);
57287	assert_eq_m128i(r, e);
57288	}
57289
57290	#[simd_test(enable = "avx512f,avx512vl")]
57291	const fn test_mm_mask_alignr_epi32() {
57292	let a = _mm_set_epi32(`4`, `3`, `2`, `1`);
57293	let b = _mm_set_epi32(`8`, `7`, `6`, `5`);
57294	let r = _mm_mask_alignr_epi32::<`1`>(a, `0`, a, b);
57295	assert_eq_m128i(r, a);
57296	let r = _mm_mask_alignr_epi32::<`1`>(a, `0b00001111`, a, b);
57297	let e = _mm_set_epi32(`1`, `8`, `7`, `6`);
57298	assert_eq_m128i(r, e);
57299	}
57300
57301	#[simd_test(enable = "avx512f,avx512vl")]
57302	const fn test_mm_maskz_alignr_epi32() {
57303	let a = _mm_set_epi32(`4`, `3`, `2`, `1`);
57304	let b = _mm_set_epi32(`8`, `7`, `6`, `5`);
57305	let r = _mm_maskz_alignr_epi32::<`1`>(`0`, a, b);
57306	assert_eq_m128i(r, _mm_setzero_si128());
57307	let r = _mm_maskz_alignr_epi32::<`1`>(`0b00001111`, a, b);
57308	let e = _mm_set_epi32(`1`, `8`, `7`, `6`);
57309	assert_eq_m128i(r, e);
57310	}
57311
57312	#[simd_test(enable = "avx512f")]
57313	const fn test_mm512_and_epi32() {
57314	#[rustfmt::skip]
57315	let a = _mm512_set_epi32(
57316	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
57317	`0`, `0`, `0`, `0`,
57318	`0`, `0`, `0`, `0`,
57319	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
57320	);
57321	#[rustfmt::skip]
57322	let b = _mm512_set_epi32(
57323	`1` << `1`, `0`, `0`, `0`,
57324	`0`, `0`, `0`, `0`,
57325	`0`, `0`, `0`, `0`,
57326	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
57327	);
57328	let r = _mm512_and_epi32(a, b);
57329	let e = _mm512_set_epi32(`1` << `1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1` << `3`);
57330	assert_eq_m512i(r, e);
57331	}
57332
57333	#[simd_test(enable = "avx512f")]
57334	const fn test_mm512_mask_and_epi32() {
57335	#[rustfmt::skip]
57336	let a = _mm512_set_epi32(
57337	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
57338	`0`, `0`, `0`, `0`,
57339	`0`, `0`, `0`, `0`,
57340	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
57341	);
57342	#[rustfmt::skip]
57343	let b = _mm512_set_epi32(
57344	`1` << `1`, `0`, `0`, `0`,
57345	`0`, `0`, `0`, `0`,
57346	`0`, `0`, `0`, `0`,
57347	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
57348	);
57349	let r = _mm512_mask_and_epi32(a, `0`, a, b);
57350	assert_eq_m512i(r, a);
57351	let r = _mm512_mask_and_epi32(a, `0b01111111_11111111`, a, b);
57352	#[rustfmt::skip]
57353	let e = _mm512_set_epi32(
57354	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
57355	`0`, `0`, `0`, `0`,
57356	`0`, `0`, `0`, `0`,
57357	`0`, `0`, `0`, `1` << `3`,
57358	);
57359	assert_eq_m512i(r, e);
57360	}
57361
57362	#[simd_test(enable = "avx512f")]
57363	const fn test_mm512_maskz_and_epi32() {
57364	#[rustfmt::skip]
57365	let a = _mm512_set_epi32(
57366	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
57367	`0`, `0`, `0`, `0`,
57368	`0`, `0`, `0`, `0`,
57369	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
57370	);
57371	#[rustfmt::skip]
57372	let b = _mm512_set_epi32(
57373	`1` << `1`, `0`, `0`, `0`,
57374	`0`, `0`, `0`, `0`,
57375	`0`, `0`, `0`, `0`,
57376	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
57377	);
57378	let r = _mm512_maskz_and_epi32(`0`, a, b);
57379	assert_eq_m512i(r, _mm512_setzero_si512());
57380	let r = _mm512_maskz_and_epi32(`0b00000000_11111111`, a, b);
57381	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1` << `3`);
57382	assert_eq_m512i(r, e);
57383	}
57384
57385	#[simd_test(enable = "avx512f,avx512vl")]
57386	const fn test_mm256_mask_and_epi32() {
57387	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
57388	let b = _mm256_set1_epi32(`1` << `1`);
57389	let r = _mm256_mask_and_epi32(a, `0`, a, b);
57390	assert_eq_m256i(r, a);
57391	let r = _mm256_mask_and_epi32(a, `0b11111111`, a, b);
57392	let e = _mm256_set1_epi32(`1` << `1`);
57393	assert_eq_m256i(r, e);
57394	}
57395
57396	#[simd_test(enable = "avx512f,avx512vl")]
57397	const fn test_mm256_maskz_and_epi32() {
57398	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
57399	let b = _mm256_set1_epi32(`1` << `1`);
57400	let r = _mm256_maskz_and_epi32(`0`, a, b);
57401	assert_eq_m256i(r, _mm256_setzero_si256());
57402	let r = _mm256_maskz_and_epi32(`0b11111111`, a, b);
57403	let e = _mm256_set1_epi32(`1` << `1`);
57404	assert_eq_m256i(r, e);
57405	}
57406
57407	#[simd_test(enable = "avx512f,avx512vl")]
57408	const fn test_mm_mask_and_epi32() {
57409	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
57410	let b = _mm_set1_epi32(`1` << `1`);
57411	let r = _mm_mask_and_epi32(a, `0`, a, b);
57412	assert_eq_m128i(r, a);
57413	let r = _mm_mask_and_epi32(a, `0b00001111`, a, b);
57414	let e = _mm_set1_epi32(`1` << `1`);
57415	assert_eq_m128i(r, e);
57416	}
57417
57418	#[simd_test(enable = "avx512f,avx512vl")]
57419	const fn test_mm_maskz_and_epi32() {
57420	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
57421	let b = _mm_set1_epi32(`1` << `1`);
57422	let r = _mm_maskz_and_epi32(`0`, a, b);
57423	assert_eq_m128i(r, _mm_setzero_si128());
57424	let r = _mm_maskz_and_epi32(`0b00001111`, a, b);
57425	let e = _mm_set1_epi32(`1` << `1`);
57426	assert_eq_m128i(r, e);
57427	}
57428
57429	#[simd_test(enable = "avx512f")]
57430	const fn test_mm512_and_si512() {
57431	#[rustfmt::skip]
57432	let a = _mm512_set_epi32(
57433	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
57434	`0`, `0`, `0`, `0`,
57435	`0`, `0`, `0`, `0`,
57436	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
57437	);
57438	#[rustfmt::skip]
57439	let b = _mm512_set_epi32(
57440	`1` << `1`, `0`, `0`, `0`,
57441	`0`, `0`, `0`, `0`,
57442	`0`, `0`, `0`, `0`,
57443	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
57444	);
57445	let r = _mm512_and_si512(a, b);
57446	let e = _mm512_set_epi32(`1` << `1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1` << `3`);
57447	assert_eq_m512i(r, e);
57448	}
57449
57450	#[simd_test(enable = "avx512f")]
57451	const fn test_mm512_or_epi32() {
57452	#[rustfmt::skip]
57453	let a = _mm512_set_epi32(
57454	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
57455	`0`, `0`, `0`, `0`,
57456	`0`, `0`, `0`, `0`,
57457	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
57458	);
57459	#[rustfmt::skip]
57460	let b = _mm512_set_epi32(
57461	`1` << `1`, `0`, `0`, `0`,
57462	`0`, `0`, `0`, `0`,
57463	`0`, `0`, `0`, `0`,
57464	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
57465	);
57466	let r = _mm512_or_epi32(a, b);
57467	#[rustfmt::skip]
57468	let e = _mm512_set_epi32(
57469	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
57470	`0`, `0`, `0`, `0`,
57471	`0`, `0`, `0`, `0`,
57472	`0`, `0`, `0`, `1` << `1` \| `1` << `3` \| `1` << `4`,
57473	);
57474	assert_eq_m512i(r, e);
57475	}
57476
57477	#[simd_test(enable = "avx512f")]
57478	const fn test_mm512_mask_or_epi32() {
57479	#[rustfmt::skip]
57480	let a = _mm512_set_epi32(
57481	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
57482	`0`, `0`, `0`, `0`,
57483	`0`, `0`, `0`, `0`,
57484	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
57485	);
57486	#[rustfmt::skip]
57487	let b = _mm512_set_epi32(
57488	`1` << `1`, `0`, `0`, `0`,
57489	`0`, `0`, `0`, `0`,
57490	`0`, `0`, `0`, `0`,
57491	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
57492	);
57493	let r = _mm512_mask_or_epi32(a, `0`, a, b);
57494	assert_eq_m512i(r, a);
57495	let r = _mm512_mask_or_epi32(a, `0b11111111_11111111`, a, b);
57496	#[rustfmt::skip]
57497	let e = _mm512_set_epi32(
57498	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
57499	`0`, `0`, `0`, `0`,
57500	`0`, `0`, `0`, `0`,
57501	`0`, `0`, `0`, `1` << `1` \| `1` << `3` \| `1` << `4`,
57502	);
57503	assert_eq_m512i(r, e);
57504	}
57505
57506	#[simd_test(enable = "avx512f")]
57507	const fn test_mm512_maskz_or_epi32() {
57508	#[rustfmt::skip]
57509	let a = _mm512_set_epi32(
57510	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
57511	`0`, `0`, `0`, `0`,
57512	`0`, `0`, `0`, `0`,
57513	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
57514	);
57515	#[rustfmt::skip]
57516	let b = _mm512_set_epi32(
57517	`1` << `1`, `0`, `0`, `0`,
57518	`0`, `0`, `0`, `0`,
57519	`0`, `0`, `0`, `0`,
57520	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
57521	);
57522	let r = _mm512_maskz_or_epi32(`0`, a, b);
57523	assert_eq_m512i(r, _mm512_setzero_si512());
57524	let r = _mm512_maskz_or_epi32(`0b00000000_11111111`, a, b);
57525	#[rustfmt::skip]
57526	let e = _mm512_set_epi32(
57527	`0`, `0`, `0`, `0`,
57528	`0`, `0`, `0`, `0`,
57529	`0`, `0`, `0`, `0`,
57530	`0`, `0`, `0`, `1` << `1` \| `1` << `3` \| `1` << `4`,
57531	);
57532	assert_eq_m512i(r, e);
57533	}
57534
57535	#[simd_test(enable = "avx512f,avx512vl")]
57536	const fn test_mm256_or_epi32() {
57537	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
57538	let b = _mm256_set1_epi32(`1` << `1`);
57539	let r = _mm256_or_epi32(a, b);
57540	let e = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
57541	assert_eq_m256i(r, e);
57542	}
57543
57544	#[simd_test(enable = "avx512f,avx512vl")]
57545	const fn test_mm256_mask_or_epi32() {
57546	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
57547	let b = _mm256_set1_epi32(`1` << `1`);
57548	let r = _mm256_mask_or_epi32(a, `0`, a, b);
57549	assert_eq_m256i(r, a);
57550	let r = _mm256_mask_or_epi32(a, `0b11111111`, a, b);
57551	let e = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
57552	assert_eq_m256i(r, e);
57553	}
57554
57555	#[simd_test(enable = "avx512f,avx512vl")]
57556	const fn test_mm256_maskz_or_epi32() {
57557	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
57558	let b = _mm256_set1_epi32(`1` << `1`);
57559	let r = _mm256_maskz_or_epi32(`0`, a, b);
57560	assert_eq_m256i(r, _mm256_setzero_si256());
57561	let r = _mm256_maskz_or_epi32(`0b11111111`, a, b);
57562	let e = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
57563	assert_eq_m256i(r, e);
57564	}
57565
57566	#[simd_test(enable = "avx512f,avx512vl")]
57567	const fn test_mm_or_epi32() {
57568	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
57569	let b = _mm_set1_epi32(`1` << `1`);
57570	let r = _mm_or_epi32(a, b);
57571	let e = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
57572	assert_eq_m128i(r, e);
57573	}
57574
57575	#[simd_test(enable = "avx512f,avx512vl")]
57576	const fn test_mm_mask_or_epi32() {
57577	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
57578	let b = _mm_set1_epi32(`1` << `1`);
57579	let r = _mm_mask_or_epi32(a, `0`, a, b);
57580	assert_eq_m128i(r, a);
57581	let r = _mm_mask_or_epi32(a, `0b00001111`, a, b);
57582	let e = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
57583	assert_eq_m128i(r, e);
57584	}
57585
57586	#[simd_test(enable = "avx512f,avx512vl")]
57587	const fn test_mm_maskz_or_epi32() {
57588	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
57589	let b = _mm_set1_epi32(`1` << `1`);
57590	let r = _mm_maskz_or_epi32(`0`, a, b);
57591	assert_eq_m128i(r, _mm_setzero_si128());
57592	let r = _mm_maskz_or_epi32(`0b00001111`, a, b);
57593	let e = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
57594	assert_eq_m128i(r, e);
57595	}
57596
57597	#[simd_test(enable = "avx512f")]
57598	const fn test_mm512_or_si512() {
57599	#[rustfmt::skip]
57600	let a = _mm512_set_epi32(
57601	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
57602	`0`, `0`, `0`, `0`,
57603	`0`, `0`, `0`, `0`,
57604	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
57605	);
57606	#[rustfmt::skip]
57607	let b = _mm512_set_epi32(
57608	`1` << `1`, `0`, `0`, `0`,
57609	`0`, `0`, `0`, `0`,
57610	`0`, `0`, `0`, `0`,
57611	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
57612	);
57613	let r = _mm512_or_si512(a, b);
57614	#[rustfmt::skip]
57615	let e = _mm512_set_epi32(
57616	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
57617	`0`, `0`, `0`, `0`,
57618	`0`, `0`, `0`, `0`,
57619	`0`, `0`, `0`, `1` << `1` \| `1` << `3` \| `1` << `4`,
57620	);
57621	assert_eq_m512i(r, e);
57622	}
57623
57624	#[simd_test(enable = "avx512f")]
57625	const fn test_mm512_xor_epi32() {
57626	#[rustfmt::skip]
57627	let a = _mm512_set_epi32(
57628	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
57629	`0`, `0`, `0`, `0`,
57630	`0`, `0`, `0`, `0`,
57631	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
57632	);
57633	#[rustfmt::skip]
57634	let b = _mm512_set_epi32(
57635	`1` << `1`, `0`, `0`, `0`,
57636	`0`, `0`, `0`, `0`,
57637	`0`, `0`, `0`, `0`,
57638	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
57639	);
57640	let r = _mm512_xor_epi32(a, b);
57641	#[rustfmt::skip]
57642	let e = _mm512_set_epi32(
57643	`1` << `2`, `0`, `0`, `0`,
57644	`0`, `0`, `0`, `0`,
57645	`0`, `0`, `0`, `0`,
57646	`0`, `0`, `0`, `1` << `1` \| `1` << `4`,
57647	);
57648	assert_eq_m512i(r, e);
57649	}
57650
57651	#[simd_test(enable = "avx512f")]
57652	const fn test_mm512_mask_xor_epi32() {
57653	#[rustfmt::skip]
57654	let a = _mm512_set_epi32(
57655	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
57656	`0`, `0`, `0`, `0`,
57657	`0`, `0`, `0`, `0`,
57658	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
57659	);
57660	#[rustfmt::skip]
57661	let b = _mm512_set_epi32(
57662	`1` << `1`, `0`, `0`, `0`,
57663	`0`, `0`, `0`, `0`,
57664	`0`, `0`, `0`, `0`,
57665	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
57666	);
57667	let r = _mm512_mask_xor_epi32(a, `0`, a, b);
57668	assert_eq_m512i(r, a);
57669	let r = _mm512_mask_xor_epi32(a, `0b01111111_11111111`, a, b);
57670	#[rustfmt::skip]
57671	let e = _mm512_set_epi32(
57672	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
57673	`0`, `0`, `0`, `0`,
57674	`0`, `0`, `0`, `0`,
57675	`0`, `0`, `0`, `1` << `1` \| `1` << `4`,
57676	);
57677	assert_eq_m512i(r, e);
57678	}
57679
57680	#[simd_test(enable = "avx512f")]
57681	const fn test_mm512_maskz_xor_epi32() {
57682	#[rustfmt::skip]
57683	let a = _mm512_set_epi32(
57684	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
57685	`0`, `0`, `0`, `0`,
57686	`0`, `0`, `0`, `0`,
57687	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
57688	);
57689	#[rustfmt::skip]
57690	let b = _mm512_set_epi32(
57691	`1` << `1`, `0`, `0`, `0`,
57692	`0`, `0`, `0`, `0`,
57693	`0`, `0`, `0`, `0`,
57694	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
57695	);
57696	let r = _mm512_maskz_xor_epi32(`0`, a, b);
57697	assert_eq_m512i(r, _mm512_setzero_si512());
57698	let r = _mm512_maskz_xor_epi32(`0b00000000_11111111`, a, b);
57699	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1` << `1` \| `1` << `4`);
57700	assert_eq_m512i(r, e);
57701	}
57702
57703	#[simd_test(enable = "avx512f,avx512vl")]
57704	const fn test_mm256_xor_epi32() {
57705	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
57706	let b = _mm256_set1_epi32(`1` << `1`);
57707	let r = _mm256_xor_epi32(a, b);
57708	let e = _mm256_set1_epi32(`1` << `2`);
57709	assert_eq_m256i(r, e);
57710	}
57711
57712	#[simd_test(enable = "avx512f,avx512vl")]
57713	const fn test_mm256_mask_xor_epi32() {
57714	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
57715	let b = _mm256_set1_epi32(`1` << `1`);
57716	let r = _mm256_mask_xor_epi32(a, `0`, a, b);
57717	assert_eq_m256i(r, a);
57718	let r = _mm256_mask_xor_epi32(a, `0b11111111`, a, b);
57719	let e = _mm256_set1_epi32(`1` << `2`);
57720	assert_eq_m256i(r, e);
57721	}
57722
57723	#[simd_test(enable = "avx512f,avx512vl")]
57724	const fn test_mm256_maskz_xor_epi32() {
57725	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
57726	let b = _mm256_set1_epi32(`1` << `1`);
57727	let r = _mm256_maskz_xor_epi32(`0`, a, b);
57728	assert_eq_m256i(r, _mm256_setzero_si256());
57729	let r = _mm256_maskz_xor_epi32(`0b11111111`, a, b);
57730	let e = _mm256_set1_epi32(`1` << `2`);
57731	assert_eq_m256i(r, e);
57732	}
57733
57734	#[simd_test(enable = "avx512f,avx512vl")]
57735	const fn test_mm_xor_epi32() {
57736	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
57737	let b = _mm_set1_epi32(`1` << `1`);
57738	let r = _mm_xor_epi32(a, b);
57739	let e = _mm_set1_epi32(`1` << `2`);
57740	assert_eq_m128i(r, e);
57741	}
57742
57743	#[simd_test(enable = "avx512f,avx512vl")]
57744	const fn test_mm_mask_xor_epi32() {
57745	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
57746	let b = _mm_set1_epi32(`1` << `1`);
57747	let r = _mm_mask_xor_epi32(a, `0`, a, b);
57748	assert_eq_m128i(r, a);
57749	let r = _mm_mask_xor_epi32(a, `0b00001111`, a, b);
57750	let e = _mm_set1_epi32(`1` << `2`);
57751	assert_eq_m128i(r, e);
57752	}
57753
57754	#[simd_test(enable = "avx512f,avx512vl")]
57755	const fn test_mm_maskz_xor_epi32() {
57756	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
57757	let b = _mm_set1_epi32(`1` << `1`);
57758	let r = _mm_maskz_xor_epi32(`0`, a, b);
57759	assert_eq_m128i(r, _mm_setzero_si128());
57760	let r = _mm_maskz_xor_epi32(`0b00001111`, a, b);
57761	let e = _mm_set1_epi32(`1` << `2`);
57762	assert_eq_m128i(r, e);
57763	}
57764
57765	#[simd_test(enable = "avx512f")]
57766	const fn test_mm512_xor_si512() {
57767	#[rustfmt::skip]
57768	let a = _mm512_set_epi32(
57769	`1` << `1` \| `1` << `2`, `0`, `0`, `0`,
57770	`0`, `0`, `0`, `0`,
57771	`0`, `0`, `0`, `0`,
57772	`0`, `0`, `0`, `1` << `1` \| `1` << `3`,
57773	);
57774	#[rustfmt::skip]
57775	let b = _mm512_set_epi32(
57776	`1` << `1`, `0`, `0`, `0`,
57777	`0`, `0`, `0`, `0`,
57778	`0`, `0`, `0`, `0`,
57779	`0`, `0`, `0`, `1` << `3` \| `1` << `4`,
57780	);
57781	let r = _mm512_xor_si512(a, b);
57782	#[rustfmt::skip]
57783	let e = _mm512_set_epi32(
57784	`1` << `2`, `0`, `0`, `0`,
57785	`0`, `0`, `0`, `0`,
57786	`0`, `0`, `0`, `0`,
57787	`0`, `0`, `0`, `1` << `1` \| `1` << `4`,
57788	);
57789	assert_eq_m512i(r, e);
57790	}
57791
57792	#[simd_test(enable = "avx512f")]
57793	const fn test_mm512_andnot_epi32() {
57794	let a = _mm512_set1_epi32(`0`);
57795	let b = _mm512_set1_epi32(`1` << `3` \| `1` << `4`);
57796	let r = _mm512_andnot_epi32(a, b);
57797	let e = _mm512_set1_epi32(`1` << `3` \| `1` << `4`);
57798	assert_eq_m512i(r, e);
57799	}
57800
57801	#[simd_test(enable = "avx512f")]
57802	const fn test_mm512_mask_andnot_epi32() {
57803	let a = _mm512_set1_epi32(`1` << `1` \| `1` << `2`);
57804	let b = _mm512_set1_epi32(`1` << `3` \| `1` << `4`);
57805	let r = _mm512_mask_andnot_epi32(a, `0`, a, b);
57806	assert_eq_m512i(r, a);
57807	let r = _mm512_mask_andnot_epi32(a, `0b11111111_11111111`, a, b);
57808	let e = _mm512_set1_epi32(`1` << `3` \| `1` << `4`);
57809	assert_eq_m512i(r, e);
57810	}
57811
57812	#[simd_test(enable = "avx512f")]
57813	const fn test_mm512_maskz_andnot_epi32() {
57814	let a = _mm512_set1_epi32(`1` << `1` \| `1` << `2`);
57815	let b = _mm512_set1_epi32(`1` << `3` \| `1` << `4`);
57816	let r = _mm512_maskz_andnot_epi32(`0`, a, b);
57817	assert_eq_m512i(r, _mm512_setzero_si512());
57818	let r = _mm512_maskz_andnot_epi32(`0b00000000_11111111`, a, b);
57819	#[rustfmt::skip]
57820	let e = _mm512_set_epi32(
57821	`0`, `0`, `0`, `0`,
57822	`0`, `0`, `0`, `0`,
57823	`1` << `3` \| `1` << `4`, `1` << `3` \| `1` << `4`, `1` << `3` \| `1` << `4`, `1` << `3` \| `1` << `4`,
57824	`1` << `3` \| `1` << `4`, `1` << `3` \| `1` << `4`, `1` << `3` \| `1` << `4`, `1` << `3` \| `1` << `4`,
57825	);
57826	assert_eq_m512i(r, e);
57827	}
57828
57829	#[simd_test(enable = "avx512f,avx512vl")]
57830	const fn test_mm256_mask_andnot_epi32() {
57831	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
57832	let b = _mm256_set1_epi32(`1` << `3` \| `1` << `4`);
57833	let r = _mm256_mask_andnot_epi32(a, `0`, a, b);
57834	assert_eq_m256i(r, a);
57835	let r = _mm256_mask_andnot_epi32(a, `0b11111111`, a, b);
57836	let e = _mm256_set1_epi32(`1` << `3` \| `1` << `4`);
57837	assert_eq_m256i(r, e);
57838	}
57839
57840	#[simd_test(enable = "avx512f,avx512vl")]
57841	const fn test_mm256_maskz_andnot_epi32() {
57842	let a = _mm256_set1_epi32(`1` << `1` \| `1` << `2`);
57843	let b = _mm256_set1_epi32(`1` << `3` \| `1` << `4`);
57844	let r = _mm256_maskz_andnot_epi32(`0`, a, b);
57845	assert_eq_m256i(r, _mm256_setzero_si256());
57846	let r = _mm256_maskz_andnot_epi32(`0b11111111`, a, b);
57847	let e = _mm256_set1_epi32(`1` << `3` \| `1` << `4`);
57848	assert_eq_m256i(r, e);
57849	}
57850
57851	#[simd_test(enable = "avx512f,avx512vl")]
57852	const fn test_mm_mask_andnot_epi32() {
57853	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
57854	let b = _mm_set1_epi32(`1` << `3` \| `1` << `4`);
57855	let r = _mm_mask_andnot_epi32(a, `0`, a, b);
57856	assert_eq_m128i(r, a);
57857	let r = _mm_mask_andnot_epi32(a, `0b00001111`, a, b);
57858	let e = _mm_set1_epi32(`1` << `3` \| `1` << `4`);
57859	assert_eq_m128i(r, e);
57860	}
57861
57862	#[simd_test(enable = "avx512f,avx512vl")]
57863	const fn test_mm_maskz_andnot_epi32() {
57864	let a = _mm_set1_epi32(`1` << `1` \| `1` << `2`);
57865	let b = _mm_set1_epi32(`1` << `3` \| `1` << `4`);
57866	let r = _mm_maskz_andnot_epi32(`0`, a, b);
57867	assert_eq_m128i(r, _mm_setzero_si128());
57868	let r = _mm_maskz_andnot_epi32(`0b00001111`, a, b);
57869	let e = _mm_set1_epi32(`1` << `3` \| `1` << `4`);
57870	assert_eq_m128i(r, e);
57871	}
57872
57873	#[simd_test(enable = "avx512f")]
57874	const fn test_cvtmask16_u32() {
57875	let a: __mmask16 = `0b11001100_00110011`;
57876	let r = _cvtmask16_u32(a);
57877	let e: u32 = `0b11001100_00110011`;
57878	assert_eq!(r, e);
57879	}
57880
57881	#[simd_test(enable = "avx512f")]
57882	const fn test_cvtu32_mask16() {
57883	let a: u32 = `0b11001100_00110011`;
57884	let r = _cvtu32_mask16(a);
57885	let e: __mmask16 = `0b11001100_00110011`;
57886	assert_eq!(r, e);
57887	}
57888
57889	#[simd_test(enable = "avx512f")]
57890	const fn test_mm512_kand() {
57891	let a: u16 = `0b11001100_00110011`;
57892	let b: u16 = `0b11001100_00110011`;
57893	let r = _mm512_kand(a, b);
57894	let e: u16 = `0b11001100_00110011`;
57895	assert_eq!(r, e);
57896	}
57897
57898	#[simd_test(enable = "avx512f")]
57899	const fn test_kand_mask16() {
57900	let a: u16 = `0b11001100_00110011`;
57901	let b: u16 = `0b11001100_00110011`;
57902	let r = _kand_mask16(a, b);
57903	let e: u16 = `0b11001100_00110011`;
57904	assert_eq!(r, e);
57905	}
57906
57907	#[simd_test(enable = "avx512f")]
57908	const fn test_mm512_kor() {
57909	let a: u16 = `0b11001100_00110011`;
57910	let b: u16 = `0b00101110_00001011`;
57911	let r = _mm512_kor(a, b);
57912	let e: u16 = `0b11101110_00111011`;
57913	assert_eq!(r, e);
57914	}
57915
57916	#[simd_test(enable = "avx512f")]
57917	const fn test_kor_mask16() {
57918	let a: u16 = `0b11001100_00110011`;
57919	let b: u16 = `0b00101110_00001011`;
57920	let r = _kor_mask16(a, b);
57921	let e: u16 = `0b11101110_00111011`;
57922	assert_eq!(r, e);
57923	}
57924
57925	#[simd_test(enable = "avx512f")]
57926	const fn test_mm512_kxor() {
57927	let a: u16 = `0b11001100_00110011`;
57928	let b: u16 = `0b00101110_00001011`;
57929	let r = _mm512_kxor(a, b);
57930	let e: u16 = `0b11100010_00111000`;
57931	assert_eq!(r, e);
57932	}
57933
57934	#[simd_test(enable = "avx512f")]
57935	const fn test_kxor_mask16() {
57936	let a: u16 = `0b11001100_00110011`;
57937	let b: u16 = `0b00101110_00001011`;
57938	let r = _kxor_mask16(a, b);
57939	let e: u16 = `0b11100010_00111000`;
57940	assert_eq!(r, e);
57941	}
57942
57943	#[simd_test(enable = "avx512f")]
57944	const fn test_mm512_knot() {
57945	let a: u16 = `0b11001100_00110011`;
57946	let r = _mm512_knot(a);
57947	let e: u16 = `0b00110011_11001100`;
57948	assert_eq!(r, e);
57949	}
57950
57951	#[simd_test(enable = "avx512f")]
57952	const fn test_knot_mask16() {
57953	let a: u16 = `0b11001100_00110011`;
57954	let r = _knot_mask16(a);
57955	let e: u16 = `0b00110011_11001100`;
57956	assert_eq!(r, e);
57957	}
57958
57959	#[simd_test(enable = "avx512f")]
57960	const fn test_mm512_kandn() {
57961	let a: u16 = `0b11001100_00110011`;
57962	let b: u16 = `0b00101110_00001011`;
57963	let r = _mm512_kandn(a, b);
57964	let e: u16 = `0b00100010_00001000`;
57965	assert_eq!(r, e);
57966	}
57967
57968	#[simd_test(enable = "avx512f")]
57969	const fn test_kandn_mask16() {
57970	let a: u16 = `0b11001100_00110011`;
57971	let b: u16 = `0b00101110_00001011`;
57972	let r = _kandn_mask16(a, b);
57973	let e: u16 = `0b00100010_00001000`;
57974	assert_eq!(r, e);
57975	}
57976
57977	#[simd_test(enable = "avx512f")]
57978	const fn test_mm512_kxnor() {
57979	let a: u16 = `0b11001100_00110011`;
57980	let b: u16 = `0b00101110_00001011`;
57981	let r = _mm512_kxnor(a, b);
57982	let e: u16 = `0b00011101_11000111`;
57983	assert_eq!(r, e);
57984	}
57985
57986	#[simd_test(enable = "avx512f")]
57987	const fn test_kxnor_mask16() {
57988	let a: u16 = `0b11001100_00110011`;
57989	let b: u16 = `0b00101110_00001011`;
57990	let r = _kxnor_mask16(a, b);
57991	let e: u16 = `0b00011101_11000111`;
57992	assert_eq!(r, e);
57993	}
57994
57995	#[simd_test(enable = "avx512f")]
57996	const fn test_kortest_mask16_u8() {
57997	let a: __mmask16 = `0b0110100101101001`;
57998	let b: __mmask16 = `0b1011011010110110`;
57999	let mut all_ones: u8 = `0`;
58000	let r = unsafe { _kortest_mask16_u8(a, b, &mut all_ones) };
58001	assert_eq!(r, `0`);
58002	assert_eq!(all_ones, `1`);
58003	}
58004
58005	#[simd_test(enable = "avx512f")]
58006	const fn test_kortestc_mask16_u8() {
58007	let a: __mmask16 = `0b0110100101101001`;
58008	let b: __mmask16 = `0b1011011010110110`;
58009	let r = _kortestc_mask16_u8(a, b);
58010	assert_eq!(r, `1`);
58011	}
58012
58013	#[simd_test(enable = "avx512f")]
58014	const fn test_kortestz_mask16_u8() {
58015	let a: __mmask16 = `0b0110100101101001`;
58016	let b: __mmask16 = `0b1011011010110110`;
58017	let r = _kortestz_mask16_u8(a, b);
58018	assert_eq!(r, `0`);
58019	}
58020
58021	#[simd_test(enable = "avx512f")]
58022	const fn test_kshiftli_mask16() {
58023	let a: __mmask16 = `0b1001011011000011`;
58024	let r = _kshiftli_mask16::<`3`>(a);
58025	let e: __mmask16 = `0b1011011000011000`;
58026	assert_eq!(r, e);
58027
58028	let r = _kshiftli_mask16::<`15`>(a);
58029	let e: __mmask16 = `0b1000000000000000`;
58030	assert_eq!(r, e);
58031
58032	let r = _kshiftli_mask16::<`16`>(a);
58033	let e: __mmask16 = `0b0000000000000000`;
58034	assert_eq!(r, e);
58035
58036	let r = _kshiftli_mask16::<`17`>(a);
58037	let e: __mmask16 = `0b0000000000000000`;
58038	assert_eq!(r, e);
58039	}
58040
58041	#[simd_test(enable = "avx512f")]
58042	const fn test_kshiftri_mask16() {
58043	let a: __mmask16 = `0b1010100100111100`;
58044	let r = _kshiftri_mask16::<`3`>(a);
58045	let e: __mmask16 = `0b0001010100100111`;
58046	assert_eq!(r, e);
58047
58048	let r = _kshiftri_mask16::<`15`>(a);
58049	let e: __mmask16 = `0b0000000000000001`;
58050	assert_eq!(r, e);
58051
58052	let r = _kshiftri_mask16::<`16`>(a);
58053	let e: __mmask16 = `0b0000000000000000`;
58054	assert_eq!(r, e);
58055
58056	let r = _kshiftri_mask16::<`17`>(a);
58057	let e: __mmask16 = `0b0000000000000000`;
58058	assert_eq!(r, e);
58059	}
58060
58061	#[simd_test(enable = "avx512f")]
58062	const fn test_load_mask16() {
58063	let a: __mmask16 = `0b1001011011000011`;
58064	let r = unsafe { _load_mask16(&a) };
58065	let e: __mmask16 = `0b1001011011000011`;
58066	assert_eq!(r, e);
58067	}
58068
58069	#[simd_test(enable = "avx512f")]
58070	const fn test_store_mask16() {
58071	let a: __mmask16 = `0b0110100100111100`;
58072	let mut r = `0`;
58073	unsafe {
58074	_store_mask16(&mut r, a);
58075	}
58076	let e: __mmask16 = `0b0110100100111100`;
58077	assert_eq!(r, e);
58078	}
58079
58080	#[simd_test(enable = "avx512f")]
58081	const fn test_mm512_kmov() {
58082	let a: u16 = `0b11001100_00110011`;
58083	let r = _mm512_kmov(a);
58084	let e: u16 = `0b11001100_00110011`;
58085	assert_eq!(r, e);
58086	}
58087
58088	#[simd_test(enable = "avx512f")]
58089	const fn test_mm512_int2mask() {
58090	let a: i32 = `0b11001100_00110011`;
58091	let r = _mm512_int2mask(a);
58092	let e: u16 = `0b11001100_00110011`;
58093	assert_eq!(r, e);
58094	}
58095
58096	#[simd_test(enable = "avx512f")]
58097	const fn test_mm512_mask2int() {
58098	let k1: __mmask16 = `0b11001100_00110011`;
58099	let r = _mm512_mask2int(k1);
58100	let e: i32 = `0b11001100_00110011`;
58101	assert_eq!(r, e);
58102	}
58103
58104	#[simd_test(enable = "avx512f")]
58105	const fn test_mm512_kunpackb() {
58106	let a: u16 = `0b11001100_00110011`;
58107	let b: u16 = `0b00101110_00001011`;
58108	let r = _mm512_kunpackb(a, b);
58109	let e: u16 = `0b00110011_00001011`;
58110	assert_eq!(r, e);
58111	}
58112
58113	#[simd_test(enable = "avx512f")]
58114	const fn test_mm512_kortestc() {
58115	let a: u16 = `0b11001100_00110011`;
58116	let b: u16 = `0b00101110_00001011`;
58117	let r = _mm512_kortestc(a, b);
58118	assert_eq!(r, `0`);
58119	let b: u16 = `0b11111111_11111111`;
58120	let r = _mm512_kortestc(a, b);
58121	assert_eq!(r, `1`);
58122	}
58123
58124	#[simd_test(enable = "avx512f")]
58125	const fn test_mm512_kortestz() {
58126	let a: u16 = `0b11001100_00110011`;
58127	let b: u16 = `0b00101110_00001011`;
58128	let r = _mm512_kortestz(a, b);
58129	assert_eq!(r, `0`);
58130	let r = _mm512_kortestz(`0`, `0`);
58131	assert_eq!(r, `1`);
58132	}
58133
58134	#[simd_test(enable = "avx512f")]
58135	const fn test_mm512_test_epi32_mask() {
58136	let a = _mm512_set1_epi32(`1` << `0`);
58137	let b = _mm512_set1_epi32(`1` << `0` \| `1` << `1`);
58138	let r = _mm512_test_epi32_mask(a, b);
58139	let e: __mmask16 = `0b11111111_11111111`;
58140	assert_eq!(r, e);
58141	}
58142
58143	#[simd_test(enable = "avx512f")]
58144	const fn test_mm512_mask_test_epi32_mask() {
58145	let a = _mm512_set1_epi32(`1` << `0`);
58146	let b = _mm512_set1_epi32(`1` << `0` \| `1` << `1`);
58147	let r = _mm512_mask_test_epi32_mask(`0`, a, b);
58148	assert_eq!(r, `0`);
58149	let r = _mm512_mask_test_epi32_mask(`0b11111111_11111111`, a, b);
58150	let e: __mmask16 = `0b11111111_11111111`;
58151	assert_eq!(r, e);
58152	}
58153
58154	#[simd_test(enable = "avx512f,avx512vl")]
58155	const fn test_mm256_test_epi32_mask() {
58156	let a = _mm256_set1_epi32(`1` << `0`);
58157	let b = _mm256_set1_epi32(`1` << `0` \| `1` << `1`);
58158	let r = _mm256_test_epi32_mask(a, b);
58159	let e: __mmask8 = `0b11111111`;
58160	assert_eq!(r, e);
58161	}
58162
58163	#[simd_test(enable = "avx512f,avx512vl")]
58164	const fn test_mm256_mask_test_epi32_mask() {
58165	let a = _mm256_set1_epi32(`1` << `0`);
58166	let b = _mm256_set1_epi32(`1` << `0` \| `1` << `1`);
58167	let r = _mm256_mask_test_epi32_mask(`0`, a, b);
58168	assert_eq!(r, `0`);
58169	let r = _mm256_mask_test_epi32_mask(`0b11111111`, a, b);
58170	let e: __mmask8 = `0b11111111`;
58171	assert_eq!(r, e);
58172	}
58173
58174	#[simd_test(enable = "avx512f,avx512vl")]
58175	const fn test_mm_test_epi32_mask() {
58176	let a = _mm_set1_epi32(`1` << `0`);
58177	let b = _mm_set1_epi32(`1` << `0` \| `1` << `1`);
58178	let r = _mm_test_epi32_mask(a, b);
58179	let e: __mmask8 = `0b00001111`;
58180	assert_eq!(r, e);
58181	}
58182
58183	#[simd_test(enable = "avx512f,avx512vl")]
58184	const fn test_mm_mask_test_epi32_mask() {
58185	let a = _mm_set1_epi32(`1` << `0`);
58186	let b = _mm_set1_epi32(`1` << `0` \| `1` << `1`);
58187	let r = _mm_mask_test_epi32_mask(`0`, a, b);
58188	assert_eq!(r, `0`);
58189	let r = _mm_mask_test_epi32_mask(`0b11111111`, a, b);
58190	let e: __mmask8 = `0b00001111`;
58191	assert_eq!(r, e);
58192	}
58193
58194	#[simd_test(enable = "avx512f")]
58195	const fn test_mm512_testn_epi32_mask() {
58196	let a = _mm512_set1_epi32(`1` << `0`);
58197	let b = _mm512_set1_epi32(`1` << `0` \| `1` << `1`);
58198	let r = _mm512_testn_epi32_mask(a, b);
58199	let e: __mmask16 = `0b00000000_00000000`;
58200	assert_eq!(r, e);
58201	}
58202
58203	#[simd_test(enable = "avx512f")]
58204	const fn test_mm512_mask_testn_epi32_mask() {
58205	let a = _mm512_set1_epi32(`1` << `0`);
58206	let b = _mm512_set1_epi32(`1` << `1`);
58207	let r = _mm512_mask_test_epi32_mask(`0`, a, b);
58208	assert_eq!(r, `0`);
58209	let r = _mm512_mask_testn_epi32_mask(`0b11111111_11111111`, a, b);
58210	let e: __mmask16 = `0b11111111_11111111`;
58211	assert_eq!(r, e);
58212	}
58213
58214	#[simd_test(enable = "avx512f,avx512vl")]
58215	const fn test_mm256_testn_epi32_mask() {
58216	let a = _mm256_set1_epi32(`1` << `0`);
58217	let b = _mm256_set1_epi32(`1` << `1`);
58218	let r = _mm256_testn_epi32_mask(a, b);
58219	let e: __mmask8 = `0b11111111`;
58220	assert_eq!(r, e);
58221	}
58222
58223	#[simd_test(enable = "avx512f,avx512vl")]
58224	const fn test_mm256_mask_testn_epi32_mask() {
58225	let a = _mm256_set1_epi32(`1` << `0`);
58226	let b = _mm256_set1_epi32(`1` << `1`);
58227	let r = _mm256_mask_test_epi32_mask(`0`, a, b);
58228	assert_eq!(r, `0`);
58229	let r = _mm256_mask_testn_epi32_mask(`0b11111111`, a, b);
58230	let e: __mmask8 = `0b11111111`;
58231	assert_eq!(r, e);
58232	}
58233
58234	#[simd_test(enable = "avx512f,avx512vl")]
58235	const fn test_mm_testn_epi32_mask() {
58236	let a = _mm_set1_epi32(`1` << `0`);
58237	let b = _mm_set1_epi32(`1` << `1`);
58238	let r = _mm_testn_epi32_mask(a, b);
58239	let e: __mmask8 = `0b00001111`;
58240	assert_eq!(r, e);
58241	}
58242
58243	#[simd_test(enable = "avx512f,avx512vl")]
58244	const fn test_mm_mask_testn_epi32_mask() {
58245	let a = _mm_set1_epi32(`1` << `0`);
58246	let b = _mm_set1_epi32(`1` << `1`);
58247	let r = _mm_mask_test_epi32_mask(`0`, a, b);
58248	assert_eq!(r, `0`);
58249	let r = _mm_mask_testn_epi32_mask(`0b11111111`, a, b);
58250	let e: __mmask8 = `0b00001111`;
58251	assert_eq!(r, e);
58252	}
58253
58254	#[simd_test(enable = "avx512f")]
58255	#[cfg_attr(miri, ignore)]
58256	fn test_mm512_stream_ps() {
58257	#[repr(align(`64`))]
58258	struct Memory {
58259	pub data: [f32; `16`], // 64 bytes
58260	}
58261	let a = _mm512_set1_ps(`7.0`);
58262	let mut mem = Memory { data: [`-1.0`; `16`] };
58263
58264	unsafe {
58265	_mm512_stream_ps(&mut mem.data[`0`] as *mut f32, a);
58266	}
58267	_mm_sfence();
58268	for i in `0`..`16` {
58269	assert_eq!(mem.data[i], get_m512(a, i));
58270	}
58271	}
58272
58273	#[simd_test(enable = "avx512f")]
58274	#[cfg_attr(miri, ignore)]
58275	fn test_mm512_stream_pd() {
58276	#[repr(align(`64`))]
58277	struct Memory {
58278	pub data: [f64; `8`],
58279	}
58280	let a = _mm512_set1_pd(`7.0`);
58281	let mut mem = Memory { data: [`-1.0`; `8`] };
58282
58283	unsafe {
58284	_mm512_stream_pd(&mut mem.data[`0`] as *mut f64, a);
58285	}
58286	_mm_sfence();
58287	for i in `0`..`8` {
58288	assert_eq!(mem.data[i], get_m512d(a, i));
58289	}
58290	}
58291
58292	#[simd_test(enable = "avx512f")]
58293	#[cfg_attr(miri, ignore)]
58294	fn test_mm512_stream_si512() {
58295	#[repr(align(`64`))]
58296	struct Memory {
58297	pub data: [i64; `8`],
58298	}
58299	let a = _mm512_set1_epi32(`7`);
58300	let mut mem = Memory { data: [`-1`; `8`] };
58301
58302	unsafe {
58303	_mm512_stream_si512(mem.data.as_mut_ptr().cast(), a);
58304	}
58305	_mm_sfence();
58306	for i in `0`..`8` {
58307	assert_eq!(mem.data[i], get_m512i(a, i));
58308	}
58309	}
58310
58311	#[simd_test(enable = "avx512f")]
58312	fn test_mm512_stream_load_si512() {
58313	let a = _mm512_set_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
58314	let r = unsafe { _mm512_stream_load_si512(core::ptr::addr_of!(a) as *const _) };
58315	assert_eq_m512i(a, r);
58316	}
58317
58318	#[simd_test(enable = "avx512f")]
58319	const fn test_mm512_reduce_add_epi32() {
58320	let a = _mm512_set1_epi32(`1`);
58321	let e: i32 = _mm512_reduce_add_epi32(a);
58322	assert_eq!(`16`, e);
58323	}
58324
58325	#[simd_test(enable = "avx512f")]
58326	const fn test_mm512_mask_reduce_add_epi32() {
58327	let a = _mm512_set1_epi32(`1`);
58328	let e: i32 = _mm512_mask_reduce_add_epi32(`0b11111111_00000000`, a);
58329	assert_eq!(`8`, e);
58330	}
58331
58332	#[simd_test(enable = "avx512f")]
58333	const fn test_mm512_reduce_add_ps() {
58334	let a = _mm512_set1_ps(`1.`);
58335	let e: f32 = _mm512_reduce_add_ps(a);
58336	assert_eq!(`16.`, e);
58337	}
58338
58339	#[simd_test(enable = "avx512f")]
58340	const fn test_mm512_mask_reduce_add_ps() {
58341	let a = _mm512_set1_ps(`1.`);
58342	let e: f32 = _mm512_mask_reduce_add_ps(`0b11111111_00000000`, a);
58343	assert_eq!(`8.`, e);
58344	}
58345
58346	#[simd_test(enable = "avx512f")]
58347	const fn test_mm512_reduce_mul_epi32() {
58348	let a = _mm512_set1_epi32(`2`);
58349	let e: i32 = _mm512_reduce_mul_epi32(a);
58350	assert_eq!(`65536`, e);
58351	}
58352
58353	#[simd_test(enable = "avx512f")]
58354	const fn test_mm512_mask_reduce_mul_epi32() {
58355	let a = _mm512_set1_epi32(`2`);
58356	let e: i32 = _mm512_mask_reduce_mul_epi32(`0b11111111_00000000`, a);
58357	assert_eq!(`256`, e);
58358	}
58359
58360	#[simd_test(enable = "avx512f")]
58361	const fn test_mm512_reduce_mul_ps() {
58362	let a = _mm512_set1_ps(`2.`);
58363	let e: f32 = _mm512_reduce_mul_ps(a);
58364	assert_eq!(`65536.`, e);
58365	}
58366
58367	#[simd_test(enable = "avx512f")]
58368	const fn test_mm512_mask_reduce_mul_ps() {
58369	let a = _mm512_set1_ps(`2.`);
58370	let e: f32 = _mm512_mask_reduce_mul_ps(`0b11111111_00000000`, a);
58371	assert_eq!(`256.`, e);
58372	}
58373
58374	#[simd_test(enable = "avx512f")]
58375	const fn test_mm512_reduce_max_epi32() {
58376	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
58377	let e: i32 = _mm512_reduce_max_epi32(a);
58378	assert_eq!(`15`, e);
58379	}
58380
58381	#[simd_test(enable = "avx512f")]
58382	const fn test_mm512_mask_reduce_max_epi32() {
58383	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
58384	let e: i32 = _mm512_mask_reduce_max_epi32(`0b11111111_00000000`, a);
58385	assert_eq!(`7`, e);
58386	}
58387
58388	#[simd_test(enable = "avx512f")]
58389	const fn test_mm512_reduce_max_epu32() {
58390	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
58391	let e: u32 = _mm512_reduce_max_epu32(a);
58392	assert_eq!(`15`, e);
58393	}
58394
58395	#[simd_test(enable = "avx512f")]
58396	const fn test_mm512_mask_reduce_max_epu32() {
58397	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
58398	let e: u32 = _mm512_mask_reduce_max_epu32(`0b11111111_00000000`, a);
58399	assert_eq!(`7`, e);
58400	}
58401
58402	#[simd_test(enable = "avx512f")]
58403	fn test_mm512_reduce_max_ps() {
58404	let a = _mm512_set_ps(
58405	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
58406	);
58407	let e: f32 = _mm512_reduce_max_ps(a);
58408	assert_eq!(`15.`, e);
58409	}
58410
58411	#[simd_test(enable = "avx512f")]
58412	fn test_mm512_mask_reduce_max_ps() {
58413	let a = _mm512_set_ps(
58414	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
58415	);
58416	let e: f32 = _mm512_mask_reduce_max_ps(`0b11111111_00000000`, a);
58417	assert_eq!(`7.`, e);
58418	}
58419
58420	#[simd_test(enable = "avx512f")]
58421	const fn test_mm512_reduce_min_epi32() {
58422	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
58423	let e: i32 = _mm512_reduce_min_epi32(a);
58424	assert_eq!(`0`, e);
58425	}
58426
58427	#[simd_test(enable = "avx512f")]
58428	const fn test_mm512_mask_reduce_min_epi32() {
58429	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
58430	let e: i32 = _mm512_mask_reduce_min_epi32(`0b11111111_00000000`, a);
58431	assert_eq!(`0`, e);
58432	}
58433
58434	#[simd_test(enable = "avx512f")]
58435	const fn test_mm512_reduce_min_epu32() {
58436	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
58437	let e: u32 = _mm512_reduce_min_epu32(a);
58438	assert_eq!(`0`, e);
58439	}
58440
58441	#[simd_test(enable = "avx512f")]
58442	const fn test_mm512_mask_reduce_min_epu32() {
58443	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
58444	let e: u32 = _mm512_mask_reduce_min_epu32(`0b11111111_00000000`, a);
58445	assert_eq!(`0`, e);
58446	}
58447
58448	#[simd_test(enable = "avx512f")]
58449	fn test_mm512_reduce_min_ps() {
58450	let a = _mm512_set_ps(
58451	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
58452	);
58453	let e: f32 = _mm512_reduce_min_ps(a);
58454	assert_eq!(`0.`, e);
58455	}
58456
58457	#[simd_test(enable = "avx512f")]
58458	fn test_mm512_mask_reduce_min_ps() {
58459	let a = _mm512_set_ps(
58460	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
58461	);
58462	let e: f32 = _mm512_mask_reduce_min_ps(`0b11111111_00000000`, a);
58463	assert_eq!(`0.`, e);
58464	}
58465
58466	#[simd_test(enable = "avx512f")]
58467	const fn test_mm512_reduce_and_epi32() {
58468	let a = _mm512_set_epi32(`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
58469	let e: i32 = _mm512_reduce_and_epi32(a);
58470	assert_eq!(`0`, e);
58471	}
58472
58473	#[simd_test(enable = "avx512f")]
58474	const fn test_mm512_mask_reduce_and_epi32() {
58475	let a = _mm512_set_epi32(`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
58476	let e: i32 = _mm512_mask_reduce_and_epi32(`0b11111111_00000000`, a);
58477	assert_eq!(`1`, e);
58478	}
58479
58480	#[simd_test(enable = "avx512f")]
58481	const fn test_mm512_reduce_or_epi32() {
58482	let a = _mm512_set_epi32(`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
58483	let e: i32 = _mm512_reduce_or_epi32(a);
58484	assert_eq!(`3`, e);
58485	}
58486
58487	#[simd_test(enable = "avx512f")]
58488	const fn test_mm512_mask_reduce_or_epi32() {
58489	let a = _mm512_set_epi32(`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`);
58490	let e: i32 = _mm512_mask_reduce_and_epi32(`0b11111111_00000000`, a);
58491	assert_eq!(`1`, e);
58492	}
58493
58494	#[simd_test(enable = "avx512f")]
58495	fn test_mm512_mask_compress_epi32() {
58496	let src = _mm512_set1_epi32(`200`);
58497	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
58498	let r = _mm512_mask_compress_epi32(src, `0`, a);
58499	assert_eq_m512i(r, src);
58500	let r = _mm512_mask_compress_epi32(src, `0b01010101_01010101`, a);
58501	let e = _mm512_set_epi32(
58502	`200`, `200`, `200`, `200`, `200`, `200`, `200`, `200`, `1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`,
58503	);
58504	assert_eq_m512i(r, e);
58505	}
58506
58507	#[simd_test(enable = "avx512f")]
58508	fn test_mm512_maskz_compress_epi32() {
58509	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
58510	let r = _mm512_maskz_compress_epi32(`0`, a);
58511	assert_eq_m512i(r, _mm512_setzero_si512());
58512	let r = _mm512_maskz_compress_epi32(`0b01010101_01010101`, a);
58513	let e = _mm512_set_epi32(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `3`, `5`, `7`, `9`, `11`, `13`, `15`);
58514	assert_eq_m512i(r, e);
58515	}
58516
58517	#[simd_test(enable = "avx512f,avx512vl")]
58518	fn test_mm256_mask_compress_epi32() {
58519	let src = _mm256_set1_epi32(`200`);
58520	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
58521	let r = _mm256_mask_compress_epi32(src, `0`, a);
58522	assert_eq_m256i(r, src);
58523	let r = _mm256_mask_compress_epi32(src, `0b01010101`, a);
58524	let e = _mm256_set_epi32(`200`, `200`, `200`, `200`, `1`, `3`, `5`, `7`);
58525	assert_eq_m256i(r, e);
58526	}
58527
58528	#[simd_test(enable = "avx512f,avx512vl")]
58529	fn test_mm256_maskz_compress_epi32() {
58530	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
58531	let r = _mm256_maskz_compress_epi32(`0`, a);
58532	assert_eq_m256i(r, _mm256_setzero_si256());
58533	let r = _mm256_maskz_compress_epi32(`0b01010101`, a);
58534	let e = _mm256_set_epi32(`0`, `0`, `0`, `0`, `1`, `3`, `5`, `7`);
58535	assert_eq_m256i(r, e);
58536	}
58537
58538	#[simd_test(enable = "avx512f,avx512vl")]
58539	fn test_mm_mask_compress_epi32() {
58540	let src = _mm_set1_epi32(`200`);
58541	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
58542	let r = _mm_mask_compress_epi32(src, `0`, a);
58543	assert_eq_m128i(r, src);
58544	let r = _mm_mask_compress_epi32(src, `0b00000101`, a);
58545	let e = _mm_set_epi32(`200`, `200`, `1`, `3`);
58546	assert_eq_m128i(r, e);
58547	}
58548
58549	#[simd_test(enable = "avx512f,avx512vl")]
58550	fn test_mm_maskz_compress_epi32() {
58551	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
58552	let r = _mm_maskz_compress_epi32(`0`, a);
58553	assert_eq_m128i(r, _mm_setzero_si128());
58554	let r = _mm_maskz_compress_epi32(`0b00000101`, a);
58555	let e = _mm_set_epi32(`0`, `0`, `1`, `3`);
58556	assert_eq_m128i(r, e);
58557	}
58558
58559	#[simd_test(enable = "avx512f")]
58560	fn test_mm512_mask_compress_ps() {
58561	let src = _mm512_set1_ps(`200.`);
58562	let a = _mm512_set_ps(
58563	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
58564	);
58565	let r = _mm512_mask_compress_ps(src, `0`, a);
58566	assert_eq_m512(r, src);
58567	let r = _mm512_mask_compress_ps(src, `0b01010101_01010101`, a);
58568	let e = _mm512_set_ps(
58569	`200.`, `200.`, `200.`, `200.`, `200.`, `200.`, `200.`, `200.`, `1.`, `3.`, `5.`, `7.`, `9.`, `11.`, `13.`, `15.`,
58570	);
58571	assert_eq_m512(r, e);
58572	}
58573
58574	#[simd_test(enable = "avx512f")]
58575	fn test_mm512_maskz_compress_ps() {
58576	let a = _mm512_set_ps(
58577	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
58578	);
58579	let r = _mm512_maskz_compress_ps(`0`, a);
58580	assert_eq_m512(r, _mm512_setzero_ps());
58581	let r = _mm512_maskz_compress_ps(`0b01010101_01010101`, a);
58582	let e = _mm512_set_ps(
58583	`0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `0.`, `1.`, `3.`, `5.`, `7.`, `9.`, `11.`, `13.`, `15.`,
58584	);
58585	assert_eq_m512(r, e);
58586	}
58587
58588	#[simd_test(enable = "avx512f,avx512vl")]
58589	fn test_mm256_mask_compress_ps() {
58590	let src = _mm256_set1_ps(`200.`);
58591	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
58592	let r = _mm256_mask_compress_ps(src, `0`, a);
58593	assert_eq_m256(r, src);
58594	let r = _mm256_mask_compress_ps(src, `0b01010101`, a);
58595	let e = _mm256_set_ps(`200.`, `200.`, `200.`, `200.`, `1.`, `3.`, `5.`, `7.`);
58596	assert_eq_m256(r, e);
58597	}
58598
58599	#[simd_test(enable = "avx512f,avx512vl")]
58600	fn test_mm256_maskz_compress_ps() {
58601	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
58602	let r = _mm256_maskz_compress_ps(`0`, a);
58603	assert_eq_m256(r, _mm256_setzero_ps());
58604	let r = _mm256_maskz_compress_ps(`0b01010101`, a);
58605	let e = _mm256_set_ps(`0.`, `0.`, `0.`, `0.`, `1.`, `3.`, `5.`, `7.`);
58606	assert_eq_m256(r, e);
58607	}
58608
58609	#[simd_test(enable = "avx512f,avx512vl")]
58610	fn test_mm_mask_compress_ps() {
58611	let src = _mm_set1_ps(`200.`);
58612	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
58613	let r = _mm_mask_compress_ps(src, `0`, a);
58614	assert_eq_m128(r, src);
58615	let r = _mm_mask_compress_ps(src, `0b00000101`, a);
58616	let e = _mm_set_ps(`200.`, `200.`, `1.`, `3.`);
58617	assert_eq_m128(r, e);
58618	}
58619
58620	#[simd_test(enable = "avx512f,avx512vl")]
58621	fn test_mm_maskz_compress_ps() {
58622	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
58623	let r = _mm_maskz_compress_ps(`0`, a);
58624	assert_eq_m128(r, _mm_setzero_ps());
58625	let r = _mm_maskz_compress_ps(`0b00000101`, a);
58626	let e = _mm_set_ps(`0.`, `0.`, `1.`, `3.`);
58627	assert_eq_m128(r, e);
58628	}
58629
58630	#[simd_test(enable = "avx512f")]
58631	fn test_mm512_mask_compressstoreu_epi32() {
58632	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
58633	let mut r = [`0_i32`; `16`];
58634	unsafe {
58635	_mm512_mask_compressstoreu_epi32(r.as_mut_ptr(), `0`, a);
58636	}
58637	assert_eq!(&r, &[`0_i32`; `16`]);
58638	unsafe {
58639	_mm512_mask_compressstoreu_epi32(r.as_mut_ptr(), `0b1111000011001010`, a);
58640	}
58641	assert_eq!(&r, &[`2`, `4`, `7`, `8`, `13`, `14`, `15`, `16`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`]);
58642	}
58643
58644	#[simd_test(enable = "avx512f,avx512vl")]
58645	fn test_mm256_mask_compressstoreu_epi32() {
58646	let a = _mm256_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
58647	let mut r = [`0_i32`; `8`];
58648	unsafe {
58649	_mm256_mask_compressstoreu_epi32(r.as_mut_ptr(), `0`, a);
58650	}
58651	assert_eq!(&r, &[`0_i32`; `8`]);
58652	unsafe {
58653	_mm256_mask_compressstoreu_epi32(r.as_mut_ptr(), `0b11001010`, a);
58654	}
58655	assert_eq!(&r, &[`2`, `4`, `7`, `8`, `0`, `0`, `0`, `0`]);
58656	}
58657
58658	#[simd_test(enable = "avx512f,avx512vl")]
58659	fn test_mm_mask_compressstoreu_epi32() {
58660	let a = _mm_setr_epi32(`1`, `2`, `3`, `4`);
58661	let mut r = [`0_i32`; `4`];
58662	unsafe {
58663	_mm_mask_compressstoreu_epi32(r.as_mut_ptr(), `0`, a);
58664	}
58665	assert_eq!(&r, &[`0_i32`; `4`]);
58666	unsafe {
58667	_mm_mask_compressstoreu_epi32(r.as_mut_ptr(), `0b1011`, a);
58668	}
58669	assert_eq!(&r, &[`1`, `2`, `4`, `0`]);
58670	}
58671
58672	#[simd_test(enable = "avx512f")]
58673	fn test_mm512_mask_compressstoreu_epi64() {
58674	let a = _mm512_setr_epi64(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`);
58675	let mut r = [`0_i64`; `8`];
58676	unsafe {
58677	_mm512_mask_compressstoreu_epi64(r.as_mut_ptr(), `0`, a);
58678	}
58679	assert_eq!(&r, &[`0_i64`; `8`]);
58680	unsafe {
58681	_mm512_mask_compressstoreu_epi64(r.as_mut_ptr(), `0b11001010`, a);
58682	}
58683	assert_eq!(&r, &[`2`, `4`, `7`, `8`, `0`, `0`, `0`, `0`]);
58684	}
58685
58686	#[simd_test(enable = "avx512f,avx512vl")]
58687	fn test_mm256_mask_compressstoreu_epi64() {
58688	let a = _mm256_setr_epi64x(`1`, `2`, `3`, `4`);
58689	let mut r = [`0_i64`; `4`];
58690	unsafe {
58691	_mm256_mask_compressstoreu_epi64(r.as_mut_ptr(), `0`, a);
58692	}
58693	assert_eq!(&r, &[`0_i64`; `4`]);
58694	unsafe {
58695	_mm256_mask_compressstoreu_epi64(r.as_mut_ptr(), `0b1011`, a);
58696	}
58697	assert_eq!(&r, &[`1`, `2`, `4`, `0`]);
58698	}
58699
58700	#[simd_test(enable = "avx512f,avx512vl")]
58701	fn test_mm_mask_compressstoreu_epi64() {
58702	let a = _mm_setr_epi64x(`1`, `2`);
58703	let mut r = [`0_i64`; `2`];
58704	unsafe {
58705	_mm_mask_compressstoreu_epi64(r.as_mut_ptr(), `0`, a);
58706	}
58707	assert_eq!(&r, &[`0_i64`; `2`]);
58708	unsafe {
58709	_mm_mask_compressstoreu_epi64(r.as_mut_ptr(), `0b10`, a);
58710	}
58711	assert_eq!(&r, &[`2`, `0`]);
58712	}
58713
58714	#[simd_test(enable = "avx512f")]
58715	fn test_mm512_mask_compressstoreu_ps() {
58716	let a = _mm512_setr_ps(
58717	`1_f32`, `2_f32`, `3_f32`, `4_f32`, `5_f32`, `6_f32`, `7_f32`, `8_f32`, `9_f32`, `10_f32`, `11_f32`, `12_f32`,
58718	`13_f32`, `14_f32`, `15_f32`, `16_f32`,
58719	);
58720	let mut r = [`0_f32`; `16`];
58721	unsafe {
58722	_mm512_mask_compressstoreu_ps(r.as_mut_ptr(), `0`, a);
58723	}
58724	assert_eq!(&r, &[`0_f32`; `16`]);
58725	unsafe {
58726	_mm512_mask_compressstoreu_ps(r.as_mut_ptr(), `0b1111000011001010`, a);
58727	}
58728	assert_eq!(
58729	&r,
58730	&[
58731	`2_f32`, `4_f32`, `7_f32`, `8_f32`, `13_f32`, `14_f32`, `15_f32`, `16_f32`, `0_f32`, `0_f32`, `0_f32`,
58732	`0_f32`, `0_f32`, `0_f32`, `0_f32`, `0_f32`
58733	]
58734	);
58735	}
58736
58737	#[simd_test(enable = "avx512f,avx512vl")]
58738	fn test_mm256_mask_compressstoreu_ps() {
58739	let a = _mm256_setr_ps(`1_f32`, `2_f32`, `3_f32`, `4_f32`, `5_f32`, `6_f32`, `7_f32`, `8_f32`);
58740	let mut r = [`0_f32`; `8`];
58741	unsafe {
58742	_mm256_mask_compressstoreu_ps(r.as_mut_ptr(), `0`, a);
58743	}
58744	assert_eq!(&r, &[`0_f32`; `8`]);
58745	unsafe {
58746	_mm256_mask_compressstoreu_ps(r.as_mut_ptr(), `0b11001010`, a);
58747	}
58748	assert_eq!(
58749	&r,
58750	&[`2_f32`, `4_f32`, `7_f32`, `8_f32`, `0_f32`, `0_f32`, `0_f32`, `0_f32`]
58751	);
58752	}
58753
58754	#[simd_test(enable = "avx512f,avx512vl")]
58755	fn test_mm_mask_compressstoreu_ps() {
58756	let a = _mm_setr_ps(`1_f32`, `2_f32`, `3_f32`, `4_f32`);
58757	let mut r = [`0.`; `4`];
58758	unsafe {
58759	_mm_mask_compressstoreu_ps(r.as_mut_ptr(), `0`, a);
58760	}
58761	assert_eq!(&r, &[`0.`; `4`]);
58762	unsafe {
58763	_mm_mask_compressstoreu_ps(r.as_mut_ptr(), `0b1011`, a);
58764	}
58765	assert_eq!(&r, &[`1_f32`, `2_f32`, `4_f32`, `0_f32`]);
58766	}
58767
58768	#[simd_test(enable = "avx512f")]
58769	fn test_mm512_mask_compressstoreu_pd() {
58770	let a = _mm512_setr_pd(`1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`);
58771	let mut r = [`0.`; `8`];
58772	unsafe {
58773	_mm512_mask_compressstoreu_pd(r.as_mut_ptr(), `0`, a);
58774	}
58775	assert_eq!(&r, &[`0.`; `8`]);
58776	unsafe {
58777	_mm512_mask_compressstoreu_pd(r.as_mut_ptr(), `0b11001010`, a);
58778	}
58779	assert_eq!(&r, &[`2.`, `4.`, `7.`, `8.`, `0.`, `0.`, `0.`, `0.`]);
58780	}
58781
58782	#[simd_test(enable = "avx512f,avx512vl")]
58783	fn test_mm256_mask_compressstoreu_pd() {
58784	let a = _mm256_setr_pd(`1.`, `2.`, `3.`, `4.`);
58785	let mut r = [`0.`; `4`];
58786	unsafe {
58787	_mm256_mask_compressstoreu_pd(r.as_mut_ptr(), `0`, a);
58788	}
58789	assert_eq!(&r, &[`0.`; `4`]);
58790	unsafe {
58791	_mm256_mask_compressstoreu_pd(r.as_mut_ptr(), `0b1011`, a);
58792	}
58793	assert_eq!(&r, &[`1.`, `2.`, `4.`, `0.`]);
58794	}
58795
58796	#[simd_test(enable = "avx512f,avx512vl")]
58797	fn test_mm_mask_compressstoreu_pd() {
58798	let a = _mm_setr_pd(`1.`, `2.`);
58799	let mut r = [`0.`; `2`];
58800	unsafe {
58801	_mm_mask_compressstoreu_pd(r.as_mut_ptr(), `0`, a);
58802	}
58803	assert_eq!(&r, &[`0.`; `2`]);
58804	unsafe {
58805	_mm_mask_compressstoreu_pd(r.as_mut_ptr(), `0b10`, a);
58806	}
58807	assert_eq!(&r, &[`2.`, `0.`]);
58808	}
58809
58810	#[simd_test(enable = "avx512f")]
58811	fn test_mm512_mask_expand_epi32() {
58812	let src = _mm512_set1_epi32(`200`);
58813	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
58814	let r = _mm512_mask_expand_epi32(src, `0`, a);
58815	assert_eq_m512i(r, src);
58816	let r = _mm512_mask_expand_epi32(src, `0b01010101_01010101`, a);
58817	let e = _mm512_set_epi32(
58818	`200`, `8`, `200`, `9`, `200`, `10`, `200`, `11`, `200`, `12`, `200`, `13`, `200`, `14`, `200`, `15`,
58819	);
58820	assert_eq_m512i(r, e);
58821	}
58822
58823	#[simd_test(enable = "avx512f")]
58824	fn test_mm512_maskz_expand_epi32() {
58825	let a = _mm512_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`);
58826	let r = _mm512_maskz_expand_epi32(`0`, a);
58827	assert_eq_m512i(r, _mm512_setzero_si512());
58828	let r = _mm512_maskz_expand_epi32(`0b01010101_01010101`, a);
58829	let e = _mm512_set_epi32(`0`, `8`, `0`, `9`, `0`, `10`, `0`, `11`, `0`, `12`, `0`, `13`, `0`, `14`, `0`, `15`);
58830	assert_eq_m512i(r, e);
58831	}
58832
58833	#[simd_test(enable = "avx512f,avx512vl")]
58834	fn test_mm256_mask_expand_epi32() {
58835	let src = _mm256_set1_epi32(`200`);
58836	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
58837	let r = _mm256_mask_expand_epi32(src, `0`, a);
58838	assert_eq_m256i(r, src);
58839	let r = _mm256_mask_expand_epi32(src, `0b01010101`, a);
58840	let e = _mm256_set_epi32(`200`, `4`, `200`, `5`, `200`, `6`, `200`, `7`);
58841	assert_eq_m256i(r, e);
58842	}
58843
58844	#[simd_test(enable = "avx512f,avx512vl")]
58845	fn test_mm256_maskz_expand_epi32() {
58846	let a = _mm256_set_epi32(`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`);
58847	let r = _mm256_maskz_expand_epi32(`0`, a);
58848	assert_eq_m256i(r, _mm256_setzero_si256());
58849	let r = _mm256_maskz_expand_epi32(`0b01010101`, a);
58850	let e = _mm256_set_epi32(`0`, `4`, `0`, `5`, `0`, `6`, `0`, `7`);
58851	assert_eq_m256i(r, e);
58852	}
58853
58854	#[simd_test(enable = "avx512f,avx512vl")]
58855	fn test_mm_mask_expand_epi32() {
58856	let src = _mm_set1_epi32(`200`);
58857	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
58858	let r = _mm_mask_expand_epi32(src, `0`, a);
58859	assert_eq_m128i(r, src);
58860	let r = _mm_mask_expand_epi32(src, `0b00000101`, a);
58861	let e = _mm_set_epi32(`200`, `2`, `200`, `3`);
58862	assert_eq_m128i(r, e);
58863	}
58864
58865	#[simd_test(enable = "avx512f,avx512vl")]
58866	fn test_mm_maskz_expand_epi32() {
58867	let a = _mm_set_epi32(`0`, `1`, `2`, `3`);
58868	let r = _mm_maskz_expand_epi32(`0`, a);
58869	assert_eq_m128i(r, _mm_setzero_si128());
58870	let r = _mm_maskz_expand_epi32(`0b00000101`, a);
58871	let e = _mm_set_epi32(`0`, `2`, `0`, `3`);
58872	assert_eq_m128i(r, e);
58873	}
58874
58875	#[simd_test(enable = "avx512f")]
58876	fn test_mm512_mask_expand_ps() {
58877	let src = _mm512_set1_ps(`200.`);
58878	let a = _mm512_set_ps(
58879	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
58880	);
58881	let r = _mm512_mask_expand_ps(src, `0`, a);
58882	assert_eq_m512(r, src);
58883	let r = _mm512_mask_expand_ps(src, `0b01010101_01010101`, a);
58884	let e = _mm512_set_ps(
58885	`200.`, `8.`, `200.`, `9.`, `200.`, `10.`, `200.`, `11.`, `200.`, `12.`, `200.`, `13.`, `200.`, `14.`, `200.`, `15.`,
58886	);
58887	assert_eq_m512(r, e);
58888	}
58889
58890	#[simd_test(enable = "avx512f")]
58891	fn test_mm512_maskz_expand_ps() {
58892	let a = _mm512_set_ps(
58893	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`,
58894	);
58895	let r = _mm512_maskz_expand_ps(`0`, a);
58896	assert_eq_m512(r, _mm512_setzero_ps());
58897	let r = _mm512_maskz_expand_ps(`0b01010101_01010101`, a);
58898	let e = _mm512_set_ps(
58899	`0.`, `8.`, `0.`, `9.`, `0.`, `10.`, `0.`, `11.`, `0.`, `12.`, `0.`, `13.`, `0.`, `14.`, `0.`, `15.`,
58900	);
58901	assert_eq_m512(r, e);
58902	}
58903
58904	#[simd_test(enable = "avx512f,avx512vl")]
58905	fn test_mm256_mask_expand_ps() {
58906	let src = _mm256_set1_ps(`200.`);
58907	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
58908	let r = _mm256_mask_expand_ps(src, `0`, a);
58909	assert_eq_m256(r, src);
58910	let r = _mm256_mask_expand_ps(src, `0b01010101`, a);
58911	let e = _mm256_set_ps(`200.`, `4.`, `200.`, `5.`, `200.`, `6.`, `200.`, `7.`);
58912	assert_eq_m256(r, e);
58913	}
58914
58915	#[simd_test(enable = "avx512f,avx512vl")]
58916	fn test_mm256_maskz_expand_ps() {
58917	let a = _mm256_set_ps(`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`);
58918	let r = _mm256_maskz_expand_ps(`0`, a);
58919	assert_eq_m256(r, _mm256_setzero_ps());
58920	let r = _mm256_maskz_expand_ps(`0b01010101`, a);
58921	let e = _mm256_set_ps(`0.`, `4.`, `0.`, `5.`, `0.`, `6.`, `0.`, `7.`);
58922	assert_eq_m256(r, e);
58923	}
58924
58925	#[simd_test(enable = "avx512f,avx512vl")]
58926	fn test_mm_mask_expand_ps() {
58927	let src = _mm_set1_ps(`200.`);
58928	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
58929	let r = _mm_mask_expand_ps(src, `0`, a);
58930	assert_eq_m128(r, src);
58931	let r = _mm_mask_expand_ps(src, `0b00000101`, a);
58932	let e = _mm_set_ps(`200.`, `2.`, `200.`, `3.`);
58933	assert_eq_m128(r, e);
58934	}
58935
58936	#[simd_test(enable = "avx512f,avx512vl")]
58937	fn test_mm_maskz_expand_ps() {
58938	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
58939	let r = _mm_maskz_expand_ps(`0`, a);
58940	assert_eq_m128(r, _mm_setzero_ps());
58941	let r = _mm_maskz_expand_ps(`0b00000101`, a);
58942	let e = _mm_set_ps(`0.`, `2.`, `0.`, `3.`);
58943	assert_eq_m128(r, e);
58944	}
58945
58946	#[simd_test(enable = "avx512f")]
58947	const fn test_mm512_loadu_epi32() {
58948	let a = &[`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`, `-4`, `-3`, `-2`, `-5`, `-8`, `-9`, `-64`, `-50`];
58949	let p = a.as_ptr();
58950	let r = unsafe { _mm512_loadu_epi32(black_box(p)) };
58951	let e = _mm512_setr_epi32(`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`, `-4`, `-3`, `-2`, `-5`, `-8`, `-9`, `-64`, `-50`);
58952	assert_eq_m512i(r, e);
58953	}
58954
58955	#[simd_test(enable = "avx512f,avx512vl")]
58956	const fn test_mm256_loadu_epi32() {
58957	let a = &[`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`];
58958	let p = a.as_ptr();
58959	let r = unsafe { _mm256_loadu_epi32(black_box(p)) };
58960	let e = _mm256_setr_epi32(`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`);
58961	assert_eq_m256i(r, e);
58962	}
58963
58964	#[simd_test(enable = "avx512f,avx512vl")]
58965	const fn test_mm_loadu_epi32() {
58966	let a = &[`4`, `3`, `2`, `5`];
58967	let p = a.as_ptr();
58968	let r = unsafe { _mm_loadu_epi32(black_box(p)) };
58969	let e = _mm_setr_epi32(`4`, `3`, `2`, `5`);
58970	assert_eq_m128i(r, e);
58971	}
58972
58973	#[simd_test(enable = "avx512f")]
58974	fn test_mm512_mask_cvtepi32_storeu_epi16() {
58975	let a = _mm512_set1_epi32(`9`);
58976	let mut r = _mm256_undefined_si256();
58977	unsafe {
58978	_mm512_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, `0b11111111_11111111`, a);
58979	}
58980	let e = _mm256_set1_epi16(`9`);
58981	assert_eq_m256i(r, e);
58982	}
58983
58984	#[simd_test(enable = "avx512f,avx512vl")]
58985	fn test_mm256_mask_cvtepi32_storeu_epi16() {
58986	let a = _mm256_set1_epi32(`9`);
58987	let mut r = _mm_undefined_si128();
58988	unsafe {
58989	_mm256_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, `0b11111111`, a);
58990	}
58991	let e = _mm_set1_epi16(`9`);
58992	assert_eq_m128i(r, e);
58993	}
58994
58995	#[simd_test(enable = "avx512f,avx512vl")]
58996	fn test_mm_mask_cvtepi32_storeu_epi16() {
58997	let a = _mm_set1_epi32(`9`);
58998	let mut r = _mm_set1_epi8(`0`);
58999	unsafe {
59000	_mm_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, `0b11111111`, a);
59001	}
59002	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, `9`, `9`, `9`, `9`);
59003	assert_eq_m128i(r, e);
59004	}
59005
59006	#[simd_test(enable = "avx512f")]
59007	fn test_mm512_mask_cvtsepi32_storeu_epi16() {
59008	let a = _mm512_set1_epi32(i32::MAX);
59009	let mut r = _mm256_undefined_si256();
59010	unsafe {
59011	_mm512_mask_cvtsepi32_storeu_epi16(
59012	&mut r as *mut _ as *mut i16,
59013	`0b11111111_11111111`,
59014	a,
59015	);
59016	}
59017	let e = _mm256_set1_epi16(i16::MAX);
59018	assert_eq_m256i(r, e);
59019	}
59020
59021	#[simd_test(enable = "avx512f,avx512vl")]
59022	fn test_mm256_mask_cvtsepi32_storeu_epi16() {
59023	let a = _mm256_set1_epi32(i32::MAX);
59024	let mut r = _mm_undefined_si128();
59025	unsafe {
59026	_mm256_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, `0b11111111`, a);
59027	}
59028	let e = _mm_set1_epi16(i16::MAX);
59029	assert_eq_m128i(r, e);
59030	}
59031
59032	#[simd_test(enable = "avx512f,avx512vl")]
59033	fn test_mm_mask_cvtsepi32_storeu_epi16() {
59034	let a = _mm_set1_epi32(i32::MAX);
59035	let mut r = _mm_set1_epi8(`0`);
59036	unsafe {
59037	_mm_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, `0b11111111`, a);
59038	}
59039	let e = _mm_set_epi16(`0`, `0`, `0`, `0`, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
59040	assert_eq_m128i(r, e);
59041	}
59042
59043	#[simd_test(enable = "avx512f")]
59044	fn test_mm512_mask_cvtusepi32_storeu_epi16() {
59045	let a = _mm512_set1_epi32(i32::MAX);
59046	let mut r = _mm256_undefined_si256();
59047	unsafe {
59048	_mm512_mask_cvtusepi32_storeu_epi16(
59049	&mut r as *mut _ as *mut i16,
59050	`0b11111111_11111111`,
59051	a,
59052	);
59053	}
59054	let e = _mm256_set1_epi16(u16::MAX as i16);
59055	assert_eq_m256i(r, e);
59056	}
59057
59058	#[simd_test(enable = "avx512f,avx512vl")]
59059	fn test_mm256_mask_cvtusepi32_storeu_epi16() {
59060	let a = _mm256_set1_epi32(i32::MAX);
59061	let mut r = _mm_undefined_si128();
59062	unsafe {
59063	_mm256_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, `0b11111111`, a);
59064	}
59065	let e = _mm_set1_epi16(u16::MAX as i16);
59066	assert_eq_m128i(r, e);
59067	}
59068
59069	#[simd_test(enable = "avx512f,avx512vl")]
59070	fn test_mm_mask_cvtusepi32_storeu_epi16() {
59071	let a = _mm_set1_epi32(i32::MAX);
59072	let mut r = _mm_set1_epi8(`0`);
59073	unsafe {
59074	_mm_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, `0b11111111`, a);
59075	}
59076	let e = _mm_set_epi16(
59077	`0`,
59078	`0`,
59079	`0`,
59080	`0`,
59081	u16::MAX as i16,
59082	u16::MAX as i16,
59083	u16::MAX as i16,
59084	u16::MAX as i16,
59085	);
59086	assert_eq_m128i(r, e);
59087	}
59088
59089	#[simd_test(enable = "avx512f")]
59090	fn test_mm512_mask_cvtepi32_storeu_epi8() {
59091	let a = _mm512_set1_epi32(`9`);
59092	let mut r = _mm_undefined_si128();
59093	unsafe {
59094	_mm512_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111_11111111`, a);
59095	}
59096	let e = _mm_set1_epi8(`9`);
59097	assert_eq_m128i(r, e);
59098	}
59099
59100	#[simd_test(enable = "avx512f,avx512vl")]
59101	fn test_mm256_mask_cvtepi32_storeu_epi8() {
59102	let a = _mm256_set1_epi32(`9`);
59103	let mut r = _mm_set1_epi8(`0`);
59104	unsafe {
59105	_mm256_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111`, a);
59106	}
59107	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `9`, `9`, `9`, `9`, `9`, `9`, `9`, `9`);
59108	assert_eq_m128i(r, e);
59109	}
59110
59111	#[simd_test(enable = "avx512f,avx512vl")]
59112	fn test_mm_mask_cvtepi32_storeu_epi8() {
59113	let a = _mm_set1_epi32(`9`);
59114	let mut r = _mm_set1_epi8(`0`);
59115	unsafe {
59116	_mm_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111`, a);
59117	}
59118	let e = _mm_set_epi8(`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `9`, `9`, `9`, `9`);
59119	assert_eq_m128i(r, e);
59120	}
59121
59122	#[simd_test(enable = "avx512f")]
59123	fn test_mm512_mask_cvtsepi32_storeu_epi8() {
59124	let a = _mm512_set1_epi32(i32::MAX);
59125	let mut r = _mm_undefined_si128();
59126	unsafe {
59127	_mm512_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111_11111111`, a);
59128	}
59129	let e = _mm_set1_epi8(i8::MAX);
59130	assert_eq_m128i(r, e);
59131	}
59132
59133	#[simd_test(enable = "avx512f,avx512vl")]
59134	fn test_mm256_mask_cvtsepi32_storeu_epi8() {
59135	let a = _mm256_set1_epi32(i32::MAX);
59136	let mut r = _mm_set1_epi8(`0`);
59137	unsafe {
59138	_mm256_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111`, a);
59139	}
59140	#[rustfmt::skip]
59141	let e = _mm_set_epi8(
59142	`0`, `0`, `0`, `0`,
59143	`0`, `0`, `0`, `0`,
59144	i8::MAX, i8::MAX, i8::MAX, i8::MAX,
59145	i8::MAX, i8::MAX, i8::MAX, i8::MAX,
59146	);
59147	assert_eq_m128i(r, e);
59148	}
59149
59150	#[simd_test(enable = "avx512f,avx512vl")]
59151	fn test_mm_mask_cvtsepi32_storeu_epi8() {
59152	let a = _mm_set1_epi32(i32::MAX);
59153	let mut r = _mm_set1_epi8(`0`);
59154	unsafe {
59155	_mm_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111`, a);
59156	}
59157	#[rustfmt::skip]
59158	let e = _mm_set_epi8(
59159	`0`, `0`, `0`, `0`,
59160	`0`, `0`, `0`, `0`,
59161	`0`, `0`, `0`, `0`,
59162	i8::MAX, i8::MAX, i8::MAX, i8::MAX,
59163	);
59164	assert_eq_m128i(r, e);
59165	}
59166
59167	#[simd_test(enable = "avx512f")]
59168	fn test_mm512_mask_cvtusepi32_storeu_epi8() {
59169	let a = _mm512_set1_epi32(i32::MAX);
59170	let mut r = _mm_undefined_si128();
59171	unsafe {
59172	_mm512_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111_11111111`, a);
59173	}
59174	let e = _mm_set1_epi8(u8::MAX as i8);
59175	assert_eq_m128i(r, e);
59176	}
59177
59178	#[simd_test(enable = "avx512f,avx512vl")]
59179	fn test_mm256_mask_cvtusepi32_storeu_epi8() {
59180	let a = _mm256_set1_epi32(i32::MAX);
59181	let mut r = _mm_set1_epi8(`0`);
59182	unsafe {
59183	_mm256_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111`, a);
59184	}
59185	#[rustfmt::skip]
59186	let e = _mm_set_epi8(
59187	`0`, `0`, `0`, `0`,
59188	`0`, `0`, `0`, `0`,
59189	u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
59190	u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
59191	);
59192	assert_eq_m128i(r, e);
59193	}
59194
59195	#[simd_test(enable = "avx512f,avx512vl")]
59196	fn test_mm_mask_cvtusepi32_storeu_epi8() {
59197	let a = _mm_set1_epi32(i32::MAX);
59198	let mut r = _mm_set1_epi8(`0`);
59199	unsafe {
59200	_mm_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, `0b11111111`, a);
59201	}
59202	#[rustfmt::skip]
59203	let e = _mm_set_epi8(
59204	`0`, `0`, `0`, `0`,
59205	`0`, `0`, `0`, `0`,
59206	`0`, `0`, `0`, `0`,
59207	u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
59208	);
59209	assert_eq_m128i(r, e);
59210	}
59211
59212	#[simd_test(enable = "avx512f")]
59213	const fn test_mm512_storeu_epi32() {
59214	let a = _mm512_set1_epi32(`9`);
59215	let mut r = _mm512_undefined_epi32();
59216	unsafe {
59217	_mm512_storeu_epi32(&mut r as *mut _ as *mut i32, a);
59218	}
59219	assert_eq_m512i(r, a);
59220	}
59221
59222	#[simd_test(enable = "avx512f,avx512vl")]
59223	const fn test_mm256_storeu_epi32() {
59224	let a = _mm256_set1_epi32(`9`);
59225	let mut r = _mm256_undefined_si256();
59226	unsafe {
59227	_mm256_storeu_epi32(&mut r as *mut _ as *mut i32, a);
59228	}
59229	assert_eq_m256i(r, a);
59230	}
59231
59232	#[simd_test(enable = "avx512f,avx512vl")]
59233	const fn test_mm_storeu_epi32() {
59234	let a = _mm_set1_epi32(`9`);
59235	let mut r = _mm_undefined_si128();
59236	unsafe {
59237	_mm_storeu_epi32(&mut r as *mut _ as *mut i32, a);
59238	}
59239	assert_eq_m128i(r, a);
59240	}
59241
59242	#[simd_test(enable = "avx512f")]
59243	const fn test_mm512_loadu_si512() {
59244	let a = &[`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`, `-4`, `-3`, `-2`, `-5`, `-8`, `-9`, `-64`, `-50`];
59245	let p = a.as_ptr().cast();
59246	let r = unsafe { _mm512_loadu_si512(black_box(p)) };
59247	let e = _mm512_setr_epi32(`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`, `-4`, `-3`, `-2`, `-5`, `-8`, `-9`, `-64`, `-50`);
59248	assert_eq_m512i(r, e);
59249	}
59250
59251	#[simd_test(enable = "avx512f")]
59252	const fn test_mm512_storeu_si512() {
59253	let a = _mm512_set1_epi32(`9`);
59254	let mut r = _mm512_undefined_epi32();
59255	unsafe {
59256	_mm512_storeu_si512(&mut r as *mut _, a);
59257	}
59258	assert_eq_m512i(r, a);
59259	}
59260
59261	#[simd_test(enable = "avx512f")]
59262	const fn test_mm512_load_si512() {
59263	#[repr(align(`64`))]
59264	struct Align {
59265	data: [i32; `16`], // 64 bytes
59266	}
59267	let a = Align {
59268	data: [`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`, `-4`, `-3`, `-2`, `-5`, `-8`, `-9`, `-64`, `-50`],
59269	};
59270	let p = (a.data).as_ptr().cast();
59271	let r = unsafe { _mm512_load_si512(black_box(p)) };
59272	let e = _mm512_setr_epi32(`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`, `-4`, `-3`, `-2`, `-5`, `-8`, `-9`, `-64`, `-50`);
59273	assert_eq_m512i(r, e);
59274	}
59275
59276	#[simd_test(enable = "avx512f")]
59277	const fn test_mm512_store_si512() {
59278	let a = _mm512_set1_epi32(`9`);
59279	let mut r = _mm512_undefined_epi32();
59280	unsafe {
59281	_mm512_store_si512(&mut r as *mut _, a);
59282	}
59283	assert_eq_m512i(r, a);
59284	}
59285
59286	#[simd_test(enable = "avx512f")]
59287	const fn test_mm512_load_epi32() {
59288	#[repr(align(`64`))]
59289	struct Align {
59290	data: [i32; `16`], // 64 bytes
59291	}
59292	let a = Align {
59293	data: [`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`, `-4`, `-3`, `-2`, `-5`, `-8`, `-9`, `-64`, `-50`],
59294	};
59295	let p = (a.data).as_ptr();
59296	let r = unsafe { _mm512_load_epi32(black_box(p)) };
59297	let e = _mm512_setr_epi32(`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`, `-4`, `-3`, `-2`, `-5`, `-8`, `-9`, `-64`, `-50`);
59298	assert_eq_m512i(r, e);
59299	}
59300
59301	#[simd_test(enable = "avx512f,avx512vl")]
59302	const fn test_mm256_load_epi32() {
59303	#[repr(align(`64`))]
59304	struct Align {
59305	data: [i32; `8`],
59306	}
59307	let a = Align {
59308	data: [`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`],
59309	};
59310	let p = (a.data).as_ptr();
59311	let r = unsafe { _mm256_load_epi32(black_box(p)) };
59312	let e = _mm256_setr_epi32(`4`, `3`, `2`, `5`, `8`, `9`, `64`, `50`);
59313	assert_eq_m256i(r, e);
59314	}
59315
59316	#[simd_test(enable = "avx512f,avx512vl")]
59317	const fn test_mm_load_epi32() {
59318	#[repr(align(`64`))]
59319	struct Align {
59320	data: [i32; `4`],
59321	}
59322	let a = Align { data: [`4`, `3`, `2`, `5`] };
59323	let p = (a.data).as_ptr();
59324	let r = unsafe { _mm_load_epi32(black_box(p)) };
59325	let e = _mm_setr_epi32(`4`, `3`, `2`, `5`);
59326	assert_eq_m128i(r, e);
59327	}
59328
59329	#[simd_test(enable = "avx512f")]
59330	const fn test_mm512_store_epi32() {
59331	let a = _mm512_set1_epi32(`9`);
59332	let mut r = _mm512_undefined_epi32();
59333	unsafe {
59334	_mm512_store_epi32(&mut r as *mut _ as *mut i32, a);
59335	}
59336	assert_eq_m512i(r, a);
59337	}
59338
59339	#[simd_test(enable = "avx512f,avx512vl")]
59340	const fn test_mm256_store_epi32() {
59341	let a = _mm256_set1_epi32(`9`);
59342	let mut r = _mm256_undefined_si256();
59343	unsafe {
59344	_mm256_store_epi32(&mut r as *mut _ as *mut i32, a);
59345	}
59346	assert_eq_m256i(r, a);
59347	}
59348
59349	#[simd_test(enable = "avx512f,avx512vl")]
59350	const fn test_mm_store_epi32() {
59351	let a = _mm_set1_epi32(`9`);
59352	let mut r = _mm_undefined_si128();
59353	unsafe {
59354	_mm_store_epi32(&mut r as *mut _ as *mut i32, a);
59355	}
59356	assert_eq_m128i(r, a);
59357	}
59358
59359	#[simd_test(enable = "avx512f")]
59360	const fn test_mm512_load_ps() {
59361	#[repr(align(`64`))]
59362	struct Align {
59363	data: [f32; `16`], // 64 bytes
59364	}
59365	let a = Align {
59366	data: [
59367	`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`, `-4.`, `-3.`, `-2.`, `-5.`, `-8.`, `-9.`, `-64.`, `-50.`,
59368	],
59369	};
59370	let p = (a.data).as_ptr();
59371	let r = unsafe { _mm512_load_ps(black_box(p)) };
59372	let e = _mm512_setr_ps(
59373	`4.`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`, `-4.`, `-3.`, `-2.`, `-5.`, `-8.`, `-9.`, `-64.`, `-50.`,
59374	);
59375	assert_eq_m512(r, e);
59376	}
59377
59378	#[simd_test(enable = "avx512f")]
59379	const fn test_mm512_store_ps() {
59380	let a = _mm512_set1_ps(`9.`);
59381	let mut r = _mm512_undefined_ps();
59382	unsafe {
59383	_mm512_store_ps(&mut r as *mut _ as *mut f32, a);
59384	}
59385	assert_eq_m512(r, a);
59386	}
59387
59388	#[simd_test(enable = "avx512f")]
59389	const fn test_mm512_mask_set1_epi32() {
59390	let src = _mm512_set1_epi32(`2`);
59391	let a: i32 = `11`;
59392	let r = _mm512_mask_set1_epi32(src, `0`, a);
59393	assert_eq_m512i(r, src);
59394	let r = _mm512_mask_set1_epi32(src, `0b11111111_11111111`, a);
59395	let e = _mm512_set1_epi32(`11`);
59396	assert_eq_m512i(r, e);
59397	}
59398
59399	#[simd_test(enable = "avx512f")]
59400	const fn test_mm512_maskz_set1_epi32() {
59401	let a: i32 = `11`;
59402	let r = _mm512_maskz_set1_epi32(`0`, a);
59403	assert_eq_m512i(r, _mm512_setzero_si512());
59404	let r = _mm512_maskz_set1_epi32(`0b11111111_11111111`, a);
59405	let e = _mm512_set1_epi32(`11`);
59406	assert_eq_m512i(r, e);
59407	}
59408
59409	#[simd_test(enable = "avx512f,avx512vl")]
59410	const fn test_mm256_mask_set1_epi32() {
59411	let src = _mm256_set1_epi32(`2`);
59412	let a: i32 = `11`;
59413	let r = _mm256_mask_set1_epi32(src, `0`, a);
59414	assert_eq_m256i(r, src);
59415	let r = _mm256_mask_set1_epi32(src, `0b11111111`, a);
59416	let e = _mm256_set1_epi32(`11`);
59417	assert_eq_m256i(r, e);
59418	}
59419
59420	#[simd_test(enable = "avx512f,avx512vl")]
59421	const fn test_mm256_maskz_set1_epi32() {
59422	let a: i32 = `11`;
59423	let r = _mm256_maskz_set1_epi32(`0`, a);
59424	assert_eq_m256i(r, _mm256_setzero_si256());
59425	let r = _mm256_maskz_set1_epi32(`0b11111111`, a);
59426	let e = _mm256_set1_epi32(`11`);
59427	assert_eq_m256i(r, e);
59428	}
59429
59430	#[simd_test(enable = "avx512f,avx512vl")]
59431	const fn test_mm_mask_set1_epi32() {
59432	let src = _mm_set1_epi32(`2`);
59433	let a: i32 = `11`;
59434	let r = _mm_mask_set1_epi32(src, `0`, a);
59435	assert_eq_m128i(r, src);
59436	let r = _mm_mask_set1_epi32(src, `0b00001111`, a);
59437	let e = _mm_set1_epi32(`11`);
59438	assert_eq_m128i(r, e);
59439	}
59440
59441	#[simd_test(enable = "avx512f,avx512vl")]
59442	const fn test_mm_maskz_set1_epi32() {
59443	let a: i32 = `11`;
59444	let r = _mm_maskz_set1_epi32(`0`, a);
59445	assert_eq_m128i(r, _mm_setzero_si128());
59446	let r = _mm_maskz_set1_epi32(`0b00001111`, a);
59447	let e = _mm_set1_epi32(`11`);
59448	assert_eq_m128i(r, e);
59449	}
59450
59451	#[simd_test(enable = "avx512f")]
59452	const fn test_mm_mask_move_ss() {
59453	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
59454	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
59455	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
59456	let r = _mm_mask_move_ss(src, `0`, a, b);
59457	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
59458	assert_eq_m128(r, e);
59459	let r = _mm_mask_move_ss(src, `0b11111111`, a, b);
59460	let e = _mm_set_ps(`1.`, `2.`, `10.`, `40.`);
59461	assert_eq_m128(r, e);
59462	}
59463
59464	#[simd_test(enable = "avx512f")]
59465	const fn test_mm_maskz_move_ss() {
59466	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
59467	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
59468	let r = _mm_maskz_move_ss(`0`, a, b);
59469	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
59470	assert_eq_m128(r, e);
59471	let r = _mm_maskz_move_ss(`0b11111111`, a, b);
59472	let e = _mm_set_ps(`1.`, `2.`, `10.`, `40.`);
59473	assert_eq_m128(r, e);
59474	}
59475
59476	#[simd_test(enable = "avx512f")]
59477	const fn test_mm_mask_move_sd() {
59478	let src = _mm_set_pd(`10.`, `11.`);
59479	let a = _mm_set_pd(`1.`, `2.`);
59480	let b = _mm_set_pd(`3.`, `4.`);
59481	let r = _mm_mask_move_sd(src, `0`, a, b);
59482	let e = _mm_set_pd(`1.`, `11.`);
59483	assert_eq_m128d(r, e);
59484	let r = _mm_mask_move_sd(src, `0b11111111`, a, b);
59485	let e = _mm_set_pd(`1.`, `4.`);
59486	assert_eq_m128d(r, e);
59487	}
59488
59489	#[simd_test(enable = "avx512f")]
59490	const fn test_mm_maskz_move_sd() {
59491	let a = _mm_set_pd(`1.`, `2.`);
59492	let b = _mm_set_pd(`3.`, `4.`);
59493	let r = _mm_maskz_move_sd(`0`, a, b);
59494	let e = _mm_set_pd(`1.`, `0.`);
59495	assert_eq_m128d(r, e);
59496	let r = _mm_maskz_move_sd(`0b11111111`, a, b);
59497	let e = _mm_set_pd(`1.`, `4.`);
59498	assert_eq_m128d(r, e);
59499	}
59500
59501	#[simd_test(enable = "avx512f")]
59502	const fn test_mm_mask_add_ss() {
59503	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
59504	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
59505	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
59506	let r = _mm_mask_add_ss(src, `0`, a, b);
59507	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
59508	assert_eq_m128(r, e);
59509	let r = _mm_mask_add_ss(src, `0b11111111`, a, b);
59510	let e = _mm_set_ps(`1.`, `2.`, `10.`, `60.`);
59511	assert_eq_m128(r, e);
59512	}
59513
59514	#[simd_test(enable = "avx512f")]
59515	const fn test_mm_maskz_add_ss() {
59516	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
59517	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
59518	let r = _mm_maskz_add_ss(`0`, a, b);
59519	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
59520	assert_eq_m128(r, e);
59521	let r = _mm_maskz_add_ss(`0b11111111`, a, b);
59522	let e = _mm_set_ps(`1.`, `2.`, `10.`, `60.`);
59523	assert_eq_m128(r, e);
59524	}
59525
59526	#[simd_test(enable = "avx512f")]
59527	const fn test_mm_mask_add_sd() {
59528	let src = _mm_set_pd(`10.`, `11.`);
59529	let a = _mm_set_pd(`1.`, `2.`);
59530	let b = _mm_set_pd(`3.`, `4.`);
59531	let r = _mm_mask_add_sd(src, `0`, a, b);
59532	let e = _mm_set_pd(`1.`, `11.`);
59533	assert_eq_m128d(r, e);
59534	let r = _mm_mask_add_sd(src, `0b11111111`, a, b);
59535	let e = _mm_set_pd(`1.`, `6.`);
59536	assert_eq_m128d(r, e);
59537	}
59538
59539	#[simd_test(enable = "avx512f")]
59540	const fn test_mm_maskz_add_sd() {
59541	let a = _mm_set_pd(`1.`, `2.`);
59542	let b = _mm_set_pd(`3.`, `4.`);
59543	let r = _mm_maskz_add_sd(`0`, a, b);
59544	let e = _mm_set_pd(`1.`, `0.`);
59545	assert_eq_m128d(r, e);
59546	let r = _mm_maskz_add_sd(`0b11111111`, a, b);
59547	let e = _mm_set_pd(`1.`, `6.`);
59548	assert_eq_m128d(r, e);
59549	}
59550
59551	#[simd_test(enable = "avx512f")]
59552	const fn test_mm_mask_sub_ss() {
59553	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
59554	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
59555	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
59556	let r = _mm_mask_sub_ss(src, `0`, a, b);
59557	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
59558	assert_eq_m128(r, e);
59559	let r = _mm_mask_sub_ss(src, `0b11111111`, a, b);
59560	let e = _mm_set_ps(`1.`, `2.`, `10.`, `-20.`);
59561	assert_eq_m128(r, e);
59562	}
59563
59564	#[simd_test(enable = "avx512f")]
59565	const fn test_mm_maskz_sub_ss() {
59566	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
59567	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
59568	let r = _mm_maskz_sub_ss(`0`, a, b);
59569	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
59570	assert_eq_m128(r, e);
59571	let r = _mm_maskz_sub_ss(`0b11111111`, a, b);
59572	let e = _mm_set_ps(`1.`, `2.`, `10.`, `-20.`);
59573	assert_eq_m128(r, e);
59574	}
59575
59576	#[simd_test(enable = "avx512f")]
59577	const fn test_mm_mask_sub_sd() {
59578	let src = _mm_set_pd(`10.`, `11.`);
59579	let a = _mm_set_pd(`1.`, `2.`);
59580	let b = _mm_set_pd(`3.`, `4.`);
59581	let r = _mm_mask_sub_sd(src, `0`, a, b);
59582	let e = _mm_set_pd(`1.`, `11.`);
59583	assert_eq_m128d(r, e);
59584	let r = _mm_mask_sub_sd(src, `0b11111111`, a, b);
59585	let e = _mm_set_pd(`1.`, `-2.`);
59586	assert_eq_m128d(r, e);
59587	}
59588
59589	#[simd_test(enable = "avx512f")]
59590	const fn test_mm_maskz_sub_sd() {
59591	let a = _mm_set_pd(`1.`, `2.`);
59592	let b = _mm_set_pd(`3.`, `4.`);
59593	let r = _mm_maskz_sub_sd(`0`, a, b);
59594	let e = _mm_set_pd(`1.`, `0.`);
59595	assert_eq_m128d(r, e);
59596	let r = _mm_maskz_sub_sd(`0b11111111`, a, b);
59597	let e = _mm_set_pd(`1.`, `-2.`);
59598	assert_eq_m128d(r, e);
59599	}
59600
59601	#[simd_test(enable = "avx512f")]
59602	const fn test_mm_mask_mul_ss() {
59603	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
59604	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
59605	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
59606	let r = _mm_mask_mul_ss(src, `0`, a, b);
59607	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
59608	assert_eq_m128(r, e);
59609	let r = _mm_mask_mul_ss(src, `0b11111111`, a, b);
59610	let e = _mm_set_ps(`1.`, `2.`, `10.`, `800.`);
59611	assert_eq_m128(r, e);
59612	}
59613
59614	#[simd_test(enable = "avx512f")]
59615	const fn test_mm_maskz_mul_ss() {
59616	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
59617	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
59618	let r = _mm_maskz_mul_ss(`0`, a, b);
59619	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
59620	assert_eq_m128(r, e);
59621	let r = _mm_maskz_mul_ss(`0b11111111`, a, b);
59622	let e = _mm_set_ps(`1.`, `2.`, `10.`, `800.`);
59623	assert_eq_m128(r, e);
59624	}
59625
59626	#[simd_test(enable = "avx512f")]
59627	const fn test_mm_mask_mul_sd() {
59628	let src = _mm_set_pd(`10.`, `11.`);
59629	let a = _mm_set_pd(`1.`, `2.`);
59630	let b = _mm_set_pd(`3.`, `4.`);
59631	let r = _mm_mask_mul_sd(src, `0`, a, b);
59632	let e = _mm_set_pd(`1.`, `11.`);
59633	assert_eq_m128d(r, e);
59634	let r = _mm_mask_mul_sd(src, `0b11111111`, a, b);
59635	let e = _mm_set_pd(`1.`, `8.`);
59636	assert_eq_m128d(r, e);
59637	}
59638
59639	#[simd_test(enable = "avx512f")]
59640	const fn test_mm_maskz_mul_sd() {
59641	let a = _mm_set_pd(`1.`, `2.`);
59642	let b = _mm_set_pd(`3.`, `4.`);
59643	let r = _mm_maskz_mul_sd(`0`, a, b);
59644	let e = _mm_set_pd(`1.`, `0.`);
59645	assert_eq_m128d(r, e);
59646	let r = _mm_maskz_mul_sd(`0b11111111`, a, b);
59647	let e = _mm_set_pd(`1.`, `8.`);
59648	assert_eq_m128d(r, e);
59649	}
59650
59651	#[simd_test(enable = "avx512f")]
59652	const fn test_mm_mask_div_ss() {
59653	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
59654	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
59655	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
59656	let r = _mm_mask_div_ss(src, `0`, a, b);
59657	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
59658	assert_eq_m128(r, e);
59659	let r = _mm_mask_div_ss(src, `0b11111111`, a, b);
59660	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.5`);
59661	assert_eq_m128(r, e);
59662	}
59663
59664	#[simd_test(enable = "avx512f")]
59665	const fn test_mm_maskz_div_ss() {
59666	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
59667	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
59668	let r = _mm_maskz_div_ss(`0`, a, b);
59669	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
59670	assert_eq_m128(r, e);
59671	let r = _mm_maskz_div_ss(`0b11111111`, a, b);
59672	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.5`);
59673	assert_eq_m128(r, e);
59674	}
59675
59676	#[simd_test(enable = "avx512f")]
59677	const fn test_mm_mask_div_sd() {
59678	let src = _mm_set_pd(`10.`, `11.`);
59679	let a = _mm_set_pd(`1.`, `2.`);
59680	let b = _mm_set_pd(`3.`, `4.`);
59681	let r = _mm_mask_div_sd(src, `0`, a, b);
59682	let e = _mm_set_pd(`1.`, `11.`);
59683	assert_eq_m128d(r, e);
59684	let r = _mm_mask_div_sd(src, `0b11111111`, a, b);
59685	let e = _mm_set_pd(`1.`, `0.5`);
59686	assert_eq_m128d(r, e);
59687	}
59688
59689	#[simd_test(enable = "avx512f")]
59690	const fn test_mm_maskz_div_sd() {
59691	let a = _mm_set_pd(`1.`, `2.`);
59692	let b = _mm_set_pd(`3.`, `4.`);
59693	let r = _mm_maskz_div_sd(`0`, a, b);
59694	let e = _mm_set_pd(`1.`, `0.`);
59695	assert_eq_m128d(r, e);
59696	let r = _mm_maskz_div_sd(`0b11111111`, a, b);
59697	let e = _mm_set_pd(`1.`, `0.5`);
59698	assert_eq_m128d(r, e);
59699	}
59700
59701	#[simd_test(enable = "avx512f")]
59702	fn test_mm_mask_max_ss() {
59703	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
59704	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
59705	let r = _mm_mask_max_ss(a, `0`, a, b);
59706	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
59707	assert_eq_m128(r, e);
59708	let r = _mm_mask_max_ss(a, `0b11111111`, a, b);
59709	let e = _mm_set_ps(`0.`, `1.`, `2.`, `7.`);
59710	assert_eq_m128(r, e);
59711	}
59712
59713	#[simd_test(enable = "avx512f")]
59714	fn test_mm_maskz_max_ss() {
59715	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
59716	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
59717	let r = _mm_maskz_max_ss(`0`, a, b);
59718	let e = _mm_set_ps(`0.`, `1.`, `2.`, `0.`);
59719	assert_eq_m128(r, e);
59720	let r = _mm_maskz_max_ss(`0b11111111`, a, b);
59721	let e = _mm_set_ps(`0.`, `1.`, `2.`, `7.`);
59722	assert_eq_m128(r, e);
59723	}
59724
59725	#[simd_test(enable = "avx512f")]
59726	fn test_mm_mask_max_sd() {
59727	let a = _mm_set_pd(`0.`, `1.`);
59728	let b = _mm_set_pd(`2.`, `3.`);
59729	let r = _mm_mask_max_sd(a, `0`, a, b);
59730	let e = _mm_set_pd(`0.`, `1.`);
59731	assert_eq_m128d(r, e);
59732	let r = _mm_mask_max_sd(a, `0b11111111`, a, b);
59733	let e = _mm_set_pd(`0.`, `3.`);
59734	assert_eq_m128d(r, e);
59735	}
59736
59737	#[simd_test(enable = "avx512f")]
59738	fn test_mm_maskz_max_sd() {
59739	let a = _mm_set_pd(`0.`, `1.`);
59740	let b = _mm_set_pd(`2.`, `3.`);
59741	let r = _mm_maskz_max_sd(`0`, a, b);
59742	let e = _mm_set_pd(`0.`, `0.`);
59743	assert_eq_m128d(r, e);
59744	let r = _mm_maskz_max_sd(`0b11111111`, a, b);
59745	let e = _mm_set_pd(`0.`, `3.`);
59746	assert_eq_m128d(r, e);
59747	}
59748
59749	#[simd_test(enable = "avx512f")]
59750	fn test_mm_mask_min_ss() {
59751	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
59752	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
59753	let r = _mm_mask_min_ss(a, `0`, a, b);
59754	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
59755	assert_eq_m128(r, e);
59756	let r = _mm_mask_min_ss(a, `0b11111111`, a, b);
59757	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
59758	assert_eq_m128(r, e);
59759	}
59760
59761	#[simd_test(enable = "avx512f")]
59762	fn test_mm_maskz_min_ss() {
59763	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
59764	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
59765	let r = _mm_maskz_min_ss(`0`, a, b);
59766	let e = _mm_set_ps(`0.`, `1.`, `2.`, `0.`);
59767	assert_eq_m128(r, e);
59768	let r = _mm_maskz_min_ss(`0b11111111`, a, b);
59769	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
59770	assert_eq_m128(r, e);
59771	}
59772
59773	#[simd_test(enable = "avx512f")]
59774	fn test_mm_mask_min_sd() {
59775	let a = _mm_set_pd(`0.`, `1.`);
59776	let b = _mm_set_pd(`2.`, `3.`);
59777	let r = _mm_mask_min_sd(a, `0`, a, b);
59778	let e = _mm_set_pd(`0.`, `1.`);
59779	assert_eq_m128d(r, e);
59780	let r = _mm_mask_min_sd(a, `0b11111111`, a, b);
59781	let e = _mm_set_pd(`0.`, `1.`);
59782	assert_eq_m128d(r, e);
59783	}
59784
59785	#[simd_test(enable = "avx512f")]
59786	fn test_mm_maskz_min_sd() {
59787	let a = _mm_set_pd(`0.`, `1.`);
59788	let b = _mm_set_pd(`2.`, `3.`);
59789	let r = _mm_maskz_min_sd(`0`, a, b);
59790	let e = _mm_set_pd(`0.`, `0.`);
59791	assert_eq_m128d(r, e);
59792	let r = _mm_maskz_min_sd(`0b11111111`, a, b);
59793	let e = _mm_set_pd(`0.`, `1.`);
59794	assert_eq_m128d(r, e);
59795	}
59796
59797	#[simd_test(enable = "avx512f")]
59798	fn test_mm_mask_sqrt_ss() {
59799	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
59800	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
59801	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
59802	let r = _mm_mask_sqrt_ss(src, `0`, a, b);
59803	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
59804	assert_eq_m128(r, e);
59805	let r = _mm_mask_sqrt_ss(src, `0b11111111`, a, b);
59806	let e = _mm_set_ps(`1.`, `2.`, `10.`, `2.`);
59807	assert_eq_m128(r, e);
59808	}
59809
59810	#[simd_test(enable = "avx512f")]
59811	fn test_mm_maskz_sqrt_ss() {
59812	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
59813	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
59814	let r = _mm_maskz_sqrt_ss(`0`, a, b);
59815	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
59816	assert_eq_m128(r, e);
59817	let r = _mm_maskz_sqrt_ss(`0b11111111`, a, b);
59818	let e = _mm_set_ps(`1.`, `2.`, `10.`, `2.`);
59819	assert_eq_m128(r, e);
59820	}
59821
59822	#[simd_test(enable = "avx512f")]
59823	fn test_mm_mask_sqrt_sd() {
59824	let src = _mm_set_pd(`10.`, `11.`);
59825	let a = _mm_set_pd(`1.`, `2.`);
59826	let b = _mm_set_pd(`3.`, `4.`);
59827	let r = _mm_mask_sqrt_sd(src, `0`, a, b);
59828	let e = _mm_set_pd(`1.`, `11.`);
59829	assert_eq_m128d(r, e);
59830	let r = _mm_mask_sqrt_sd(src, `0b11111111`, a, b);
59831	let e = _mm_set_pd(`1.`, `2.`);
59832	assert_eq_m128d(r, e);
59833	}
59834
59835	#[simd_test(enable = "avx512f")]
59836	fn test_mm_maskz_sqrt_sd() {
59837	let a = _mm_set_pd(`1.`, `2.`);
59838	let b = _mm_set_pd(`3.`, `4.`);
59839	let r = _mm_maskz_sqrt_sd(`0`, a, b);
59840	let e = _mm_set_pd(`1.`, `0.`);
59841	assert_eq_m128d(r, e);
59842	let r = _mm_maskz_sqrt_sd(`0b11111111`, a, b);
59843	let e = _mm_set_pd(`1.`, `2.`);
59844	assert_eq_m128d(r, e);
59845	}
59846
59847	#[simd_test(enable = "avx512f")]
59848	fn test_mm_rsqrt14_ss() {
59849	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
59850	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
59851	let r = _mm_rsqrt14_ss(a, b);
59852	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.5`);
59853	assert_eq_m128(r, e);
59854	}
59855
59856	#[simd_test(enable = "avx512f")]
59857	fn test_mm_mask_rsqrt14_ss() {
59858	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
59859	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
59860	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
59861	let r = _mm_mask_rsqrt14_ss(src, `0`, a, b);
59862	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
59863	assert_eq_m128(r, e);
59864	let r = _mm_mask_rsqrt14_ss(src, `0b11111111`, a, b);
59865	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.5`);
59866	assert_eq_m128(r, e);
59867	}
59868
59869	#[simd_test(enable = "avx512f")]
59870	fn test_mm_maskz_rsqrt14_ss() {
59871	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
59872	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
59873	let r = _mm_maskz_rsqrt14_ss(`0`, a, b);
59874	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
59875	assert_eq_m128(r, e);
59876	let r = _mm_maskz_rsqrt14_ss(`0b11111111`, a, b);
59877	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.5`);
59878	assert_eq_m128(r, e);
59879	}
59880
59881	#[simd_test(enable = "avx512f")]
59882	fn test_mm_rsqrt14_sd() {
59883	let a = _mm_set_pd(`1.`, `2.`);
59884	let b = _mm_set_pd(`3.`, `4.`);
59885	let r = _mm_rsqrt14_sd(a, b);
59886	let e = _mm_set_pd(`1.`, `0.5`);
59887	assert_eq_m128d(r, e);
59888	}
59889
59890	#[simd_test(enable = "avx512f")]
59891	fn test_mm_mask_rsqrt14_sd() {
59892	let src = _mm_set_pd(`10.`, `11.`);
59893	let a = _mm_set_pd(`1.`, `2.`);
59894	let b = _mm_set_pd(`3.`, `4.`);
59895	let r = _mm_mask_rsqrt14_sd(src, `0`, a, b);
59896	let e = _mm_set_pd(`1.`, `11.`);
59897	assert_eq_m128d(r, e);
59898	let r = _mm_mask_rsqrt14_sd(src, `0b11111111`, a, b);
59899	let e = _mm_set_pd(`1.`, `0.5`);
59900	assert_eq_m128d(r, e);
59901	}
59902
59903	#[simd_test(enable = "avx512f")]
59904	fn test_mm_maskz_rsqrt14_sd() {
59905	let a = _mm_set_pd(`1.`, `2.`);
59906	let b = _mm_set_pd(`3.`, `4.`);
59907	let r = _mm_maskz_rsqrt14_sd(`0`, a, b);
59908	let e = _mm_set_pd(`1.`, `0.`);
59909	assert_eq_m128d(r, e);
59910	let r = _mm_maskz_rsqrt14_sd(`0b11111111`, a, b);
59911	let e = _mm_set_pd(`1.`, `0.5`);
59912	assert_eq_m128d(r, e);
59913	}
59914
59915	#[simd_test(enable = "avx512f")]
59916	fn test_mm_rcp14_ss() {
59917	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
59918	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
59919	let r = _mm_rcp14_ss(a, b);
59920	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.25`);
59921	assert_eq_m128(r, e);
59922	}
59923
59924	#[simd_test(enable = "avx512f")]
59925	fn test_mm_mask_rcp14_ss() {
59926	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
59927	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
59928	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
59929	let r = _mm_mask_rcp14_ss(src, `0`, a, b);
59930	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
59931	assert_eq_m128(r, e);
59932	let r = _mm_mask_rcp14_ss(src, `0b11111111`, a, b);
59933	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.25`);
59934	assert_eq_m128(r, e);
59935	}
59936
59937	#[simd_test(enable = "avx512f")]
59938	fn test_mm_maskz_rcp14_ss() {
59939	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
59940	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
59941	let r = _mm_maskz_rcp14_ss(`0`, a, b);
59942	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
59943	assert_eq_m128(r, e);
59944	let r = _mm_maskz_rcp14_ss(`0b11111111`, a, b);
59945	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.25`);
59946	assert_eq_m128(r, e);
59947	}
59948
59949	#[simd_test(enable = "avx512f")]
59950	fn test_mm_rcp14_sd() {
59951	let a = _mm_set_pd(`1.`, `2.`);
59952	let b = _mm_set_pd(`3.`, `4.`);
59953	let r = _mm_rcp14_sd(a, b);
59954	let e = _mm_set_pd(`1.`, `0.25`);
59955	assert_eq_m128d(r, e);
59956	}
59957
59958	#[simd_test(enable = "avx512f")]
59959	fn test_mm_mask_rcp14_sd() {
59960	let src = _mm_set_pd(`10.`, `11.`);
59961	let a = _mm_set_pd(`1.`, `2.`);
59962	let b = _mm_set_pd(`3.`, `4.`);
59963	let r = _mm_mask_rcp14_sd(src, `0`, a, b);
59964	let e = _mm_set_pd(`1.`, `11.`);
59965	assert_eq_m128d(r, e);
59966	let r = _mm_mask_rcp14_sd(src, `0b11111111`, a, b);
59967	let e = _mm_set_pd(`1.`, `0.25`);
59968	assert_eq_m128d(r, e);
59969	}
59970
59971	#[simd_test(enable = "avx512f")]
59972	fn test_mm_maskz_rcp14_sd() {
59973	let a = _mm_set_pd(`1.`, `2.`);
59974	let b = _mm_set_pd(`3.`, `4.`);
59975	let r = _mm_maskz_rcp14_sd(`0`, a, b);
59976	let e = _mm_set_pd(`1.`, `0.`);
59977	assert_eq_m128d(r, e);
59978	let r = _mm_maskz_rcp14_sd(`0b11111111`, a, b);
59979	let e = _mm_set_pd(`1.`, `0.25`);
59980	assert_eq_m128d(r, e);
59981	}
59982
59983	#[simd_test(enable = "avx512f")]
59984	fn test_mm_getexp_ss() {
59985	let a = _mm_set1_ps(`2.`);
59986	let b = _mm_set1_ps(`3.`);
59987	let r = _mm_getexp_ss(a, b);
59988	let e = _mm_set_ps(`2.`, `2.`, `2.`, `1.`);
59989	assert_eq_m128(r, e);
59990	}
59991
59992	#[simd_test(enable = "avx512f")]
59993	fn test_mm_mask_getexp_ss() {
59994	let a = _mm_set1_ps(`2.`);
59995	let b = _mm_set1_ps(`3.`);
59996	let r = _mm_mask_getexp_ss(a, `0`, a, b);
59997	let e = _mm_set_ps(`2.`, `2.`, `2.`, `2.`);
59998	assert_eq_m128(r, e);
59999	let r = _mm_mask_getexp_ss(a, `0b11111111`, a, b);
60000	let e = _mm_set_ps(`2.`, `2.`, `2.`, `1.`);
60001	assert_eq_m128(r, e);
60002	}
60003
60004	#[simd_test(enable = "avx512f")]
60005	fn test_mm_maskz_getexp_ss() {
60006	let a = _mm_set1_ps(`2.`);
60007	let b = _mm_set1_ps(`3.`);
60008	let r = _mm_maskz_getexp_ss(`0`, a, b);
60009	let e = _mm_set_ps(`2.`, `2.`, `2.`, `0.`);
60010	assert_eq_m128(r, e);
60011	let r = _mm_maskz_getexp_ss(`0b11111111`, a, b);
60012	let e = _mm_set_ps(`2.`, `2.`, `2.`, `1.`);
60013	assert_eq_m128(r, e);
60014	}
60015
60016	#[simd_test(enable = "avx512f")]
60017	fn test_mm_getexp_sd() {
60018	let a = _mm_set1_pd(`2.`);
60019	let b = _mm_set1_pd(`3.`);
60020	let r = _mm_getexp_sd(a, b);
60021	let e = _mm_set_pd(`2.`, `1.`);
60022	assert_eq_m128d(r, e);
60023	}
60024
60025	#[simd_test(enable = "avx512f")]
60026	fn test_mm_mask_getexp_sd() {
60027	let a = _mm_set1_pd(`2.`);
60028	let b = _mm_set1_pd(`3.`);
60029	let r = _mm_mask_getexp_sd(a, `0`, a, b);
60030	let e = _mm_set_pd(`2.`, `2.`);
60031	assert_eq_m128d(r, e);
60032	let r = _mm_mask_getexp_sd(a, `0b11111111`, a, b);
60033	let e = _mm_set_pd(`2.`, `1.`);
60034	assert_eq_m128d(r, e);
60035	}
60036
60037	#[simd_test(enable = "avx512f")]
60038	fn test_mm_maskz_getexp_sd() {
60039	let a = _mm_set1_pd(`2.`);
60040	let b = _mm_set1_pd(`3.`);
60041	let r = _mm_maskz_getexp_sd(`0`, a, b);
60042	let e = _mm_set_pd(`2.`, `0.`);
60043	assert_eq_m128d(r, e);
60044	let r = _mm_maskz_getexp_sd(`0b11111111`, a, b);
60045	let e = _mm_set_pd(`2.`, `1.`);
60046	assert_eq_m128d(r, e);
60047	}
60048
60049	#[simd_test(enable = "avx512f")]
60050	fn test_mm_getmant_ss() {
60051	let a = _mm_set1_ps(`20.`);
60052	let b = _mm_set1_ps(`10.`);
60053	let r = _mm_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
60054	let e = _mm_set_ps(`20.`, `20.`, `20.`, `1.25`);
60055	assert_eq_m128(r, e);
60056	}
60057
60058	#[simd_test(enable = "avx512f")]
60059	fn test_mm_mask_getmant_ss() {
60060	let a = _mm_set1_ps(`20.`);
60061	let b = _mm_set1_ps(`10.`);
60062	let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0`, a, b);
60063	let e = _mm_set_ps(`20.`, `20.`, `20.`, `20.`);
60064	assert_eq_m128(r, e);
60065	let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0b11111111`, a, b);
60066	let e = _mm_set_ps(`20.`, `20.`, `20.`, `1.25`);
60067	assert_eq_m128(r, e);
60068	}
60069
60070	#[simd_test(enable = "avx512f")]
60071	fn test_mm_maskz_getmant_ss() {
60072	let a = _mm_set1_ps(`20.`);
60073	let b = _mm_set1_ps(`10.`);
60074	let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0`, a, b);
60075	let e = _mm_set_ps(`20.`, `20.`, `20.`, `0.`);
60076	assert_eq_m128(r, e);
60077	let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0b11111111`, a, b);
60078	let e = _mm_set_ps(`20.`, `20.`, `20.`, `1.25`);
60079	assert_eq_m128(r, e);
60080	}
60081
60082	#[simd_test(enable = "avx512f")]
60083	fn test_mm_getmant_sd() {
60084	let a = _mm_set1_pd(`20.`);
60085	let b = _mm_set1_pd(`10.`);
60086	let r = _mm_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
60087	let e = _mm_set_pd(`20.`, `1.25`);
60088	assert_eq_m128d(r, e);
60089	}
60090
60091	#[simd_test(enable = "avx512f")]
60092	fn test_mm_mask_getmant_sd() {
60093	let a = _mm_set1_pd(`20.`);
60094	let b = _mm_set1_pd(`10.`);
60095	let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0`, a, b);
60096	let e = _mm_set_pd(`20.`, `20.`);
60097	assert_eq_m128d(r, e);
60098	let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, `0b11111111`, a, b);
60099	let e = _mm_set_pd(`20.`, `1.25`);
60100	assert_eq_m128d(r, e);
60101	}
60102
60103	#[simd_test(enable = "avx512f")]
60104	fn test_mm_maskz_getmant_sd() {
60105	let a = _mm_set1_pd(`20.`);
60106	let b = _mm_set1_pd(`10.`);
60107	let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0`, a, b);
60108	let e = _mm_set_pd(`20.`, `0.`);
60109	assert_eq_m128d(r, e);
60110	let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(`0b11111111`, a, b);
60111	let e = _mm_set_pd(`20.`, `1.25`);
60112	assert_eq_m128d(r, e);
60113	}
60114
60115	#[simd_test(enable = "avx512f")]
60116	fn test_mm_roundscale_ss() {
60117	let a = _mm_set1_ps(`2.2`);
60118	let b = _mm_set1_ps(`1.1`);
60119	let r = _mm_roundscale_ss::<`0`>(a, b);
60120	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `1.0`);
60121	assert_eq_m128(r, e);
60122	}
60123
60124	#[simd_test(enable = "avx512f")]
60125	fn test_mm_mask_roundscale_ss() {
60126	let a = _mm_set1_ps(`2.2`);
60127	let b = _mm_set1_ps(`1.1`);
60128	let r = _mm_mask_roundscale_ss::<`0`>(a, `0`, a, b);
60129	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `2.2`);
60130	assert_eq_m128(r, e);
60131	let r = _mm_mask_roundscale_ss::<`0`>(a, `0b11111111`, a, b);
60132	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `1.0`);
60133	assert_eq_m128(r, e);
60134	}
60135
60136	#[simd_test(enable = "avx512f")]
60137	fn test_mm_maskz_roundscale_ss() {
60138	let a = _mm_set1_ps(`2.2`);
60139	let b = _mm_set1_ps(`1.1`);
60140	let r = _mm_maskz_roundscale_ss::<`0`>(`0`, a, b);
60141	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `0.0`);
60142	assert_eq_m128(r, e);
60143	let r = _mm_maskz_roundscale_ss::<`0`>(`0b11111111`, a, b);
60144	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `1.0`);
60145	assert_eq_m128(r, e);
60146	}
60147
60148	#[simd_test(enable = "avx512f")]
60149	fn test_mm_roundscale_sd() {
60150	let a = _mm_set1_pd(`2.2`);
60151	let b = _mm_set1_pd(`1.1`);
60152	let r = _mm_roundscale_sd::<`0`>(a, b);
60153	let e = _mm_set_pd(`2.2`, `1.0`);
60154	assert_eq_m128d(r, e);
60155	}
60156
60157	#[simd_test(enable = "avx512f")]
60158	fn test_mm_mask_roundscale_sd() {
60159	let a = _mm_set1_pd(`2.2`);
60160	let b = _mm_set1_pd(`1.1`);
60161	let r = _mm_mask_roundscale_sd::<`0`>(a, `0`, a, b);
60162	let e = _mm_set_pd(`2.2`, `2.2`);
60163	assert_eq_m128d(r, e);
60164	let r = _mm_mask_roundscale_sd::<`0`>(a, `0b11111111`, a, b);
60165	let e = _mm_set_pd(`2.2`, `1.0`);
60166	assert_eq_m128d(r, e);
60167	}
60168
60169	#[simd_test(enable = "avx512f")]
60170	fn test_mm_maskz_roundscale_sd() {
60171	let a = _mm_set1_pd(`2.2`);
60172	let b = _mm_set1_pd(`1.1`);
60173	let r = _mm_maskz_roundscale_sd::<`0`>(`0`, a, b);
60174	let e = _mm_set_pd(`2.2`, `0.0`);
60175	assert_eq_m128d(r, e);
60176	let r = _mm_maskz_roundscale_sd::<`0`>(`0b11111111`, a, b);
60177	let e = _mm_set_pd(`2.2`, `1.0`);
60178	assert_eq_m128d(r, e);
60179	}
60180
60181	#[simd_test(enable = "avx512f")]
60182	fn test_mm_scalef_ss() {
60183	let a = _mm_set1_ps(`1.`);
60184	let b = _mm_set1_ps(`3.`);
60185	let r = _mm_scalef_ss(a, b);
60186	let e = _mm_set_ps(`1.`, `1.`, `1.`, `8.`);
60187	assert_eq_m128(r, e);
60188	}
60189
60190	#[simd_test(enable = "avx512f")]
60191	fn test_mm_mask_scalef_ss() {
60192	let a = _mm_set1_ps(`1.`);
60193	let b = _mm_set1_ps(`3.`);
60194	let r = _mm_mask_scalef_ss(a, `0`, a, b);
60195	let e = _mm_set_ps(`1.`, `1.`, `1.`, `1.`);
60196	assert_eq_m128(r, e);
60197	let r = _mm_mask_scalef_ss(a, `0b11111111`, a, b);
60198	let e = _mm_set_ps(`1.`, `1.`, `1.`, `8.`);
60199	assert_eq_m128(r, e);
60200	}
60201
60202	#[simd_test(enable = "avx512f")]
60203	fn test_mm_maskz_scalef_ss() {
60204	let a = _mm_set1_ps(`1.`);
60205	let b = _mm_set1_ps(`3.`);
60206	let r = _mm_maskz_scalef_ss(`0`, a, b);
60207	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
60208	assert_eq_m128(r, e);
60209	let r = _mm_maskz_scalef_ss(`0b11111111`, a, b);
60210	let e = _mm_set_ps(`1.`, `1.`, `1.`, `8.`);
60211	assert_eq_m128(r, e);
60212	}
60213
60214	#[simd_test(enable = "avx512f")]
60215	fn test_mm_scalef_sd() {
60216	let a = _mm_set1_pd(`1.`);
60217	let b = _mm_set1_pd(`3.`);
60218	let r = _mm_scalef_sd(a, b);
60219	let e = _mm_set_pd(`1.`, `8.`);
60220	assert_eq_m128d(r, e);
60221	}
60222
60223	#[simd_test(enable = "avx512f")]
60224	fn test_mm_mask_scalef_sd() {
60225	let a = _mm_set1_pd(`1.`);
60226	let b = _mm_set1_pd(`3.`);
60227	let r = _mm_mask_scalef_sd(a, `0`, a, b);
60228	let e = _mm_set_pd(`1.`, `1.`);
60229	assert_eq_m128d(r, e);
60230	let r = _mm_mask_scalef_sd(a, `0b11111111`, a, b);
60231	let e = _mm_set_pd(`1.`, `8.`);
60232	assert_eq_m128d(r, e);
60233	}
60234
60235	#[simd_test(enable = "avx512f")]
60236	fn test_mm_maskz_scalef_sd() {
60237	let a = _mm_set1_pd(`1.`);
60238	let b = _mm_set1_pd(`3.`);
60239	let r = _mm_maskz_scalef_sd(`0`, a, b);
60240	let e = _mm_set_pd(`1.`, `0.`);
60241	assert_eq_m128d(r, e);
60242	let r = _mm_maskz_scalef_sd(`0b11111111`, a, b);
60243	let e = _mm_set_pd(`1.`, `8.`);
60244	assert_eq_m128d(r, e);
60245	}
60246
60247	#[simd_test(enable = "avx512f")]
60248	const fn test_mm_mask_fmadd_ss() {
60249	let a = _mm_set1_ps(`1.`);
60250	let b = _mm_set1_ps(`2.`);
60251	let c = _mm_set1_ps(`3.`);
60252	let r = _mm_mask_fmadd_ss(a, `0`, b, c);
60253	assert_eq_m128(r, a);
60254	let r = _mm_mask_fmadd_ss(a, `0b11111111`, b, c);
60255	let e = _mm_set_ps(`1.`, `1.`, `1.`, `5.`);
60256	assert_eq_m128(r, e);
60257	}
60258
60259	#[simd_test(enable = "avx512f")]
60260	const fn test_mm_maskz_fmadd_ss() {
60261	let a = _mm_set1_ps(`1.`);
60262	let b = _mm_set1_ps(`2.`);
60263	let c = _mm_set1_ps(`3.`);
60264	let r = _mm_maskz_fmadd_ss(`0`, a, b, c);
60265	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
60266	assert_eq_m128(r, e);
60267	let r = _mm_maskz_fmadd_ss(`0b11111111`, a, b, c);
60268	let e = _mm_set_ps(`1.`, `1.`, `1.`, `5.`);
60269	assert_eq_m128(r, e);
60270	}
60271
60272	#[simd_test(enable = "avx512f")]
60273	const fn test_mm_mask3_fmadd_ss() {
60274	let a = _mm_set1_ps(`1.`);
60275	let b = _mm_set1_ps(`2.`);
60276	let c = _mm_set1_ps(`3.`);
60277	let r = _mm_mask3_fmadd_ss(a, b, c, `0`);
60278	assert_eq_m128(r, c);
60279	let r = _mm_mask3_fmadd_ss(a, b, c, `0b11111111`);
60280	let e = _mm_set_ps(`3.`, `3.`, `3.`, `5.`);
60281	assert_eq_m128(r, e);
60282	}
60283
60284	#[simd_test(enable = "avx512f")]
60285	const fn test_mm_mask_fmadd_sd() {
60286	let a = _mm_set1_pd(`1.`);
60287	let b = _mm_set1_pd(`2.`);
60288	let c = _mm_set1_pd(`3.`);
60289	let r = _mm_mask_fmadd_sd(a, `0`, b, c);
60290	assert_eq_m128d(r, a);
60291	let r = _mm_mask_fmadd_sd(a, `0b11111111`, b, c);
60292	let e = _mm_set_pd(`1.`, `5.`);
60293	assert_eq_m128d(r, e);
60294	}
60295
60296	#[simd_test(enable = "avx512f")]
60297	const fn test_mm_maskz_fmadd_sd() {
60298	let a = _mm_set1_pd(`1.`);
60299	let b = _mm_set1_pd(`2.`);
60300	let c = _mm_set1_pd(`3.`);
60301	let r = _mm_maskz_fmadd_sd(`0`, a, b, c);
60302	let e = _mm_set_pd(`1.`, `0.`);
60303	assert_eq_m128d(r, e);
60304	let r = _mm_maskz_fmadd_sd(`0b11111111`, a, b, c);
60305	let e = _mm_set_pd(`1.`, `5.`);
60306	assert_eq_m128d(r, e);
60307	}
60308
60309	#[simd_test(enable = "avx512f")]
60310	const fn test_mm_mask3_fmadd_sd() {
60311	let a = _mm_set1_pd(`1.`);
60312	let b = _mm_set1_pd(`2.`);
60313	let c = _mm_set1_pd(`3.`);
60314	let r = _mm_mask3_fmadd_sd(a, b, c, `0`);
60315	assert_eq_m128d(r, c);
60316	let r = _mm_mask3_fmadd_sd(a, b, c, `0b11111111`);
60317	let e = _mm_set_pd(`3.`, `5.`);
60318	assert_eq_m128d(r, e);
60319	}
60320
60321	#[simd_test(enable = "avx512f")]
60322	const fn test_mm_mask_fmsub_ss() {
60323	let a = _mm_set1_ps(`1.`);
60324	let b = _mm_set1_ps(`2.`);
60325	let c = _mm_set1_ps(`3.`);
60326	let r = _mm_mask_fmsub_ss(a, `0`, b, c);
60327	assert_eq_m128(r, a);
60328	let r = _mm_mask_fmsub_ss(a, `0b11111111`, b, c);
60329	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-1.`);
60330	assert_eq_m128(r, e);
60331	}
60332
60333	#[simd_test(enable = "avx512f")]
60334	const fn test_mm_maskz_fmsub_ss() {
60335	let a = _mm_set1_ps(`1.`);
60336	let b = _mm_set1_ps(`2.`);
60337	let c = _mm_set1_ps(`3.`);
60338	let r = _mm_maskz_fmsub_ss(`0`, a, b, c);
60339	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
60340	assert_eq_m128(r, e);
60341	let r = _mm_maskz_fmsub_ss(`0b11111111`, a, b, c);
60342	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-1.`);
60343	assert_eq_m128(r, e);
60344	}
60345
60346	#[simd_test(enable = "avx512f")]
60347	const fn test_mm_mask3_fmsub_ss() {
60348	let a = _mm_set1_ps(`1.`);
60349	let b = _mm_set1_ps(`2.`);
60350	let c = _mm_set1_ps(`3.`);
60351	let r = _mm_mask3_fmsub_ss(a, b, c, `0`);
60352	assert_eq_m128(r, c);
60353	let r = _mm_mask3_fmsub_ss(a, b, c, `0b11111111`);
60354	let e = _mm_set_ps(`3.`, `3.`, `3.`, `-1.`);
60355	assert_eq_m128(r, e);
60356	}
60357
60358	#[simd_test(enable = "avx512f")]
60359	const fn test_mm_mask_fmsub_sd() {
60360	let a = _mm_set1_pd(`1.`);
60361	let b = _mm_set1_pd(`2.`);
60362	let c = _mm_set1_pd(`3.`);
60363	let r = _mm_mask_fmsub_sd(a, `0`, b, c);
60364	assert_eq_m128d(r, a);
60365	let r = _mm_mask_fmsub_sd(a, `0b11111111`, b, c);
60366	let e = _mm_set_pd(`1.`, `-1.`);
60367	assert_eq_m128d(r, e);
60368	}
60369
60370	#[simd_test(enable = "avx512f")]
60371	const fn test_mm_maskz_fmsub_sd() {
60372	let a = _mm_set1_pd(`1.`);
60373	let b = _mm_set1_pd(`2.`);
60374	let c = _mm_set1_pd(`3.`);
60375	let r = _mm_maskz_fmsub_sd(`0`, a, b, c);
60376	let e = _mm_set_pd(`1.`, `0.`);
60377	assert_eq_m128d(r, e);
60378	let r = _mm_maskz_fmsub_sd(`0b11111111`, a, b, c);
60379	let e = _mm_set_pd(`1.`, `-1.`);
60380	assert_eq_m128d(r, e);
60381	}
60382
60383	#[simd_test(enable = "avx512f")]
60384	const fn test_mm_mask3_fmsub_sd() {
60385	let a = _mm_set1_pd(`1.`);
60386	let b = _mm_set1_pd(`2.`);
60387	let c = _mm_set1_pd(`3.`);
60388	let r = _mm_mask3_fmsub_sd(a, b, c, `0`);
60389	assert_eq_m128d(r, c);
60390	let r = _mm_mask3_fmsub_sd(a, b, c, `0b11111111`);
60391	let e = _mm_set_pd(`3.`, `-1.`);
60392	assert_eq_m128d(r, e);
60393	}
60394
60395	#[simd_test(enable = "avx512f")]
60396	const fn test_mm_mask_fnmadd_ss() {
60397	let a = _mm_set1_ps(`1.`);
60398	let b = _mm_set1_ps(`2.`);
60399	let c = _mm_set1_ps(`3.`);
60400	let r = _mm_mask_fnmadd_ss(a, `0`, b, c);
60401	assert_eq_m128(r, a);
60402	let r = _mm_mask_fnmadd_ss(a, `0b11111111`, b, c);
60403	let e = _mm_set_ps(`1.`, `1.`, `1.`, `1.`);
60404	assert_eq_m128(r, e);
60405	}
60406
60407	#[simd_test(enable = "avx512f")]
60408	const fn test_mm_maskz_fnmadd_ss() {
60409	let a = _mm_set1_ps(`1.`);
60410	let b = _mm_set1_ps(`2.`);
60411	let c = _mm_set1_ps(`3.`);
60412	let r = _mm_maskz_fnmadd_ss(`0`, a, b, c);
60413	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
60414	assert_eq_m128(r, e);
60415	let r = _mm_maskz_fnmadd_ss(`0b11111111`, a, b, c);
60416	let e = _mm_set_ps(`1.`, `1.`, `1.`, `1.`);
60417	assert_eq_m128(r, e);
60418	}
60419
60420	#[simd_test(enable = "avx512f")]
60421	const fn test_mm_mask3_fnmadd_ss() {
60422	let a = _mm_set1_ps(`1.`);
60423	let b = _mm_set1_ps(`2.`);
60424	let c = _mm_set1_ps(`3.`);
60425	let r = _mm_mask3_fnmadd_ss(a, b, c, `0`);
60426	assert_eq_m128(r, c);
60427	let r = _mm_mask3_fnmadd_ss(a, b, c, `0b11111111`);
60428	let e = _mm_set_ps(`3.`, `3.`, `3.`, `1.`);
60429	assert_eq_m128(r, e);
60430	}
60431
60432	#[simd_test(enable = "avx512f")]
60433	const fn test_mm_mask_fnmadd_sd() {
60434	let a = _mm_set1_pd(`1.`);
60435	let b = _mm_set1_pd(`2.`);
60436	let c = _mm_set1_pd(`3.`);
60437	let r = _mm_mask_fnmadd_sd(a, `0`, b, c);
60438	assert_eq_m128d(r, a);
60439	let r = _mm_mask_fnmadd_sd(a, `0b11111111`, b, c);
60440	let e = _mm_set_pd(`1.`, `1.`);
60441	assert_eq_m128d(r, e);
60442	}
60443
60444	#[simd_test(enable = "avx512f")]
60445	const fn test_mm_maskz_fnmadd_sd() {
60446	let a = _mm_set1_pd(`1.`);
60447	let b = _mm_set1_pd(`2.`);
60448	let c = _mm_set1_pd(`3.`);
60449	let r = _mm_maskz_fnmadd_sd(`0`, a, b, c);
60450	let e = _mm_set_pd(`1.`, `0.`);
60451	assert_eq_m128d(r, e);
60452	let r = _mm_maskz_fnmadd_sd(`0b11111111`, a, b, c);
60453	let e = _mm_set_pd(`1.`, `1.`);
60454	assert_eq_m128d(r, e);
60455	}
60456
60457	#[simd_test(enable = "avx512f")]
60458	const fn test_mm_mask3_fnmadd_sd() {
60459	let a = _mm_set1_pd(`1.`);
60460	let b = _mm_set1_pd(`2.`);
60461	let c = _mm_set1_pd(`3.`);
60462	let r = _mm_mask3_fnmadd_sd(a, b, c, `0`);
60463	assert_eq_m128d(r, c);
60464	let r = _mm_mask3_fnmadd_sd(a, b, c, `0b11111111`);
60465	let e = _mm_set_pd(`3.`, `1.`);
60466	assert_eq_m128d(r, e);
60467	}
60468
60469	#[simd_test(enable = "avx512f")]
60470	const fn test_mm_mask_fnmsub_ss() {
60471	let a = _mm_set1_ps(`1.`);
60472	let b = _mm_set1_ps(`2.`);
60473	let c = _mm_set1_ps(`3.`);
60474	let r = _mm_mask_fnmsub_ss(a, `0`, b, c);
60475	assert_eq_m128(r, a);
60476	let r = _mm_mask_fnmsub_ss(a, `0b11111111`, b, c);
60477	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-5.`);
60478	assert_eq_m128(r, e);
60479	}
60480
60481	#[simd_test(enable = "avx512f")]
60482	const fn test_mm_maskz_fnmsub_ss() {
60483	let a = _mm_set1_ps(`1.`);
60484	let b = _mm_set1_ps(`2.`);
60485	let c = _mm_set1_ps(`3.`);
60486	let r = _mm_maskz_fnmsub_ss(`0`, a, b, c);
60487	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
60488	assert_eq_m128(r, e);
60489	let r = _mm_maskz_fnmsub_ss(`0b11111111`, a, b, c);
60490	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-5.`);
60491	assert_eq_m128(r, e);
60492	}
60493
60494	#[simd_test(enable = "avx512f")]
60495	const fn test_mm_mask3_fnmsub_ss() {
60496	let a = _mm_set1_ps(`1.`);
60497	let b = _mm_set1_ps(`2.`);
60498	let c = _mm_set1_ps(`3.`);
60499	let r = _mm_mask3_fnmsub_ss(a, b, c, `0`);
60500	assert_eq_m128(r, c);
60501	let r = _mm_mask3_fnmsub_ss(a, b, c, `0b11111111`);
60502	let e = _mm_set_ps(`3.`, `3.`, `3.`, `-5.`);
60503	assert_eq_m128(r, e);
60504	}
60505
60506	#[simd_test(enable = "avx512f")]
60507	const fn test_mm_mask_fnmsub_sd() {
60508	let a = _mm_set1_pd(`1.`);
60509	let b = _mm_set1_pd(`2.`);
60510	let c = _mm_set1_pd(`3.`);
60511	let r = _mm_mask_fnmsub_sd(a, `0`, b, c);
60512	assert_eq_m128d(r, a);
60513	let r = _mm_mask_fnmsub_sd(a, `0b11111111`, b, c);
60514	let e = _mm_set_pd(`1.`, `-5.`);
60515	assert_eq_m128d(r, e);
60516	}
60517
60518	#[simd_test(enable = "avx512f")]
60519	const fn test_mm_maskz_fnmsub_sd() {
60520	let a = _mm_set1_pd(`1.`);
60521	let b = _mm_set1_pd(`2.`);
60522	let c = _mm_set1_pd(`3.`);
60523	let r = _mm_maskz_fnmsub_sd(`0`, a, b, c);
60524	let e = _mm_set_pd(`1.`, `0.`);
60525	assert_eq_m128d(r, e);
60526	let r = _mm_maskz_fnmsub_sd(`0b11111111`, a, b, c);
60527	let e = _mm_set_pd(`1.`, `-5.`);
60528	assert_eq_m128d(r, e);
60529	}
60530
60531	#[simd_test(enable = "avx512f")]
60532	const fn test_mm_mask3_fnmsub_sd() {
60533	let a = _mm_set1_pd(`1.`);
60534	let b = _mm_set1_pd(`2.`);
60535	let c = _mm_set1_pd(`3.`);
60536	let r = _mm_mask3_fnmsub_sd(a, b, c, `0`);
60537	assert_eq_m128d(r, c);
60538	let r = _mm_mask3_fnmsub_sd(a, b, c, `0b11111111`);
60539	let e = _mm_set_pd(`3.`, `-5.`);
60540	assert_eq_m128d(r, e);
60541	}
60542
60543	#[simd_test(enable = "avx512f")]
60544	fn test_mm_add_round_ss() {
60545	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
60546	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
60547	let r = _mm_add_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
60548	let e = _mm_set_ps(`1.`, `2.`, `10.`, `60.`);
60549	assert_eq_m128(r, e);
60550	}
60551
60552	#[simd_test(enable = "avx512f")]
60553	fn test_mm_mask_add_round_ss() {
60554	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
60555	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
60556	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
60557	let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
60558	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
60559	assert_eq_m128(r, e);
60560	let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
60561	src, `0b11111111`, a, b,
60562	);
60563	let e = _mm_set_ps(`1.`, `2.`, `10.`, `60.`);
60564	assert_eq_m128(r, e);
60565	}
60566
60567	#[simd_test(enable = "avx512f")]
60568	fn test_mm_maskz_add_round_ss() {
60569	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
60570	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
60571	let r = _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
60572	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
60573	assert_eq_m128(r, e);
60574	let r =
60575	_mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
60576	let e = _mm_set_ps(`1.`, `2.`, `10.`, `60.`);
60577	assert_eq_m128(r, e);
60578	}
60579
60580	#[simd_test(enable = "avx512f")]
60581	fn test_mm_add_round_sd() {
60582	let a = _mm_set_pd(`1.`, `2.`);
60583	let b = _mm_set_pd(`3.`, `4.`);
60584	let r = _mm_add_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
60585	let e = _mm_set_pd(`1.`, `6.`);
60586	assert_eq_m128d(r, e);
60587	}
60588
60589	#[simd_test(enable = "avx512f")]
60590	fn test_mm_mask_add_round_sd() {
60591	let src = _mm_set_pd(`10.`, `11.`);
60592	let a = _mm_set_pd(`1.`, `2.`);
60593	let b = _mm_set_pd(`3.`, `4.`);
60594	let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
60595	let e = _mm_set_pd(`1.`, `11.`);
60596	assert_eq_m128d(r, e);
60597	let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
60598	src, `0b11111111`, a, b,
60599	);
60600	let e = _mm_set_pd(`1.`, `6.`);
60601	assert_eq_m128d(r, e);
60602	}
60603
60604	#[simd_test(enable = "avx512f")]
60605	fn test_mm_maskz_add_round_sd() {
60606	let a = _mm_set_pd(`1.`, `2.`);
60607	let b = _mm_set_pd(`3.`, `4.`);
60608	let r = _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
60609	let e = _mm_set_pd(`1.`, `0.`);
60610	assert_eq_m128d(r, e);
60611	let r =
60612	_mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
60613	let e = _mm_set_pd(`1.`, `6.`);
60614	assert_eq_m128d(r, e);
60615	}
60616
60617	#[simd_test(enable = "avx512f")]
60618	fn test_mm_sub_round_ss() {
60619	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
60620	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
60621	let r = _mm_sub_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
60622	let e = _mm_set_ps(`1.`, `2.`, `10.`, `-20.`);
60623	assert_eq_m128(r, e);
60624	}
60625
60626	#[simd_test(enable = "avx512f")]
60627	fn test_mm_mask_sub_round_ss() {
60628	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
60629	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
60630	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
60631	let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
60632	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
60633	assert_eq_m128(r, e);
60634	let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
60635	src, `0b11111111`, a, b,
60636	);
60637	let e = _mm_set_ps(`1.`, `2.`, `10.`, `-20.`);
60638	assert_eq_m128(r, e);
60639	}
60640
60641	#[simd_test(enable = "avx512f")]
60642	fn test_mm_maskz_sub_round_ss() {
60643	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
60644	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
60645	let r = _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
60646	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
60647	assert_eq_m128(r, e);
60648	let r =
60649	_mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
60650	let e = _mm_set_ps(`1.`, `2.`, `10.`, `-20.`);
60651	assert_eq_m128(r, e);
60652	}
60653
60654	#[simd_test(enable = "avx512f")]
60655	fn test_mm_sub_round_sd() {
60656	let a = _mm_set_pd(`1.`, `2.`);
60657	let b = _mm_set_pd(`3.`, `4.`);
60658	let r = _mm_sub_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
60659	let e = _mm_set_pd(`1.`, `-2.`);
60660	assert_eq_m128d(r, e);
60661	}
60662
60663	#[simd_test(enable = "avx512f")]
60664	fn test_mm_mask_sub_round_sd() {
60665	let src = _mm_set_pd(`10.`, `11.`);
60666	let a = _mm_set_pd(`1.`, `2.`);
60667	let b = _mm_set_pd(`3.`, `4.`);
60668	let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
60669	let e = _mm_set_pd(`1.`, `11.`);
60670	assert_eq_m128d(r, e);
60671	let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
60672	src, `0b11111111`, a, b,
60673	);
60674	let e = _mm_set_pd(`1.`, `-2.`);
60675	assert_eq_m128d(r, e);
60676	}
60677
60678	#[simd_test(enable = "avx512f")]
60679	fn test_mm_maskz_sub_round_sd() {
60680	let a = _mm_set_pd(`1.`, `2.`);
60681	let b = _mm_set_pd(`3.`, `4.`);
60682	let r = _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
60683	let e = _mm_set_pd(`1.`, `0.`);
60684	assert_eq_m128d(r, e);
60685	let r =
60686	_mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
60687	let e = _mm_set_pd(`1.`, `-2.`);
60688	assert_eq_m128d(r, e);
60689	}
60690
60691	#[simd_test(enable = "avx512f")]
60692	fn test_mm_mul_round_ss() {
60693	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
60694	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
60695	let r = _mm_mul_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
60696	let e = _mm_set_ps(`1.`, `2.`, `10.`, `800.`);
60697	assert_eq_m128(r, e);
60698	}
60699
60700	#[simd_test(enable = "avx512f")]
60701	fn test_mm_mask_mul_round_ss() {
60702	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
60703	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
60704	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
60705	let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
60706	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
60707	assert_eq_m128(r, e);
60708	let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
60709	src, `0b11111111`, a, b,
60710	);
60711	let e = _mm_set_ps(`1.`, `2.`, `10.`, `800.`);
60712	assert_eq_m128(r, e);
60713	}
60714
60715	#[simd_test(enable = "avx512f")]
60716	fn test_mm_maskz_mul_round_ss() {
60717	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
60718	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
60719	let r = _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
60720	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
60721	assert_eq_m128(r, e);
60722	let r =
60723	_mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
60724	let e = _mm_set_ps(`1.`, `2.`, `10.`, `800.`);
60725	assert_eq_m128(r, e);
60726	}
60727
60728	#[simd_test(enable = "avx512f")]
60729	fn test_mm_mul_round_sd() {
60730	let a = _mm_set_pd(`1.`, `2.`);
60731	let b = _mm_set_pd(`3.`, `4.`);
60732	let r = _mm_mul_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
60733	let e = _mm_set_pd(`1.`, `8.`);
60734	assert_eq_m128d(r, e);
60735	}
60736
60737	#[simd_test(enable = "avx512f")]
60738	fn test_mm_mask_mul_round_sd() {
60739	let src = _mm_set_pd(`10.`, `11.`);
60740	let a = _mm_set_pd(`1.`, `2.`);
60741	let b = _mm_set_pd(`3.`, `4.`);
60742	let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
60743	let e = _mm_set_pd(`1.`, `11.`);
60744	assert_eq_m128d(r, e);
60745	let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
60746	src, `0b11111111`, a, b,
60747	);
60748	let e = _mm_set_pd(`1.`, `8.`);
60749	assert_eq_m128d(r, e);
60750	}
60751
60752	#[simd_test(enable = "avx512f")]
60753	fn test_mm_maskz_mul_round_sd() {
60754	let a = _mm_set_pd(`1.`, `2.`);
60755	let b = _mm_set_pd(`3.`, `4.`);
60756	let r = _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
60757	let e = _mm_set_pd(`1.`, `0.`);
60758	assert_eq_m128d(r, e);
60759	let r =
60760	_mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
60761	let e = _mm_set_pd(`1.`, `8.`);
60762	assert_eq_m128d(r, e);
60763	}
60764
60765	#[simd_test(enable = "avx512f")]
60766	fn test_mm_div_round_ss() {
60767	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
60768	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
60769	let r = _mm_div_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
60770	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.5`);
60771	assert_eq_m128(r, e);
60772	}
60773
60774	#[simd_test(enable = "avx512f")]
60775	fn test_mm_mask_div_round_ss() {
60776	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
60777	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
60778	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
60779	let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
60780	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
60781	assert_eq_m128(r, e);
60782	let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
60783	src, `0b11111111`, a, b,
60784	);
60785	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.5`);
60786	assert_eq_m128(r, e);
60787	}
60788
60789	#[simd_test(enable = "avx512f")]
60790	fn test_mm_maskz_div_round_ss() {
60791	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
60792	let b = _mm_set_ps(`3.`, `4.`, `30.`, `40.`);
60793	let r = _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
60794	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
60795	assert_eq_m128(r, e);
60796	let r =
60797	_mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
60798	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.5`);
60799	assert_eq_m128(r, e);
60800	}
60801
60802	#[simd_test(enable = "avx512f")]
60803	fn test_mm_div_round_sd() {
60804	let a = _mm_set_pd(`1.`, `2.`);
60805	let b = _mm_set_pd(`3.`, `4.`);
60806	let r = _mm_div_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
60807	let e = _mm_set_pd(`1.`, `0.5`);
60808	assert_eq_m128d(r, e);
60809	}
60810
60811	#[simd_test(enable = "avx512f")]
60812	fn test_mm_mask_div_round_sd() {
60813	let src = _mm_set_pd(`10.`, `11.`);
60814	let a = _mm_set_pd(`1.`, `2.`);
60815	let b = _mm_set_pd(`3.`, `4.`);
60816	let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
60817	let e = _mm_set_pd(`1.`, `11.`);
60818	assert_eq_m128d(r, e);
60819	let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
60820	src, `0b11111111`, a, b,
60821	);
60822	let e = _mm_set_pd(`1.`, `0.5`);
60823	assert_eq_m128d(r, e);
60824	}
60825
60826	#[simd_test(enable = "avx512f")]
60827	fn test_mm_maskz_div_round_sd() {
60828	let a = _mm_set_pd(`1.`, `2.`);
60829	let b = _mm_set_pd(`3.`, `4.`);
60830	let r = _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
60831	let e = _mm_set_pd(`1.`, `0.`);
60832	assert_eq_m128d(r, e);
60833	let r =
60834	_mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
60835	let e = _mm_set_pd(`1.`, `0.5`);
60836	assert_eq_m128d(r, e);
60837	}
60838
60839	#[simd_test(enable = "avx512f")]
60840	fn test_mm_max_round_ss() {
60841	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
60842	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
60843	let r = _mm_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
60844	let e = _mm_set_ps(`0.`, `1.`, `2.`, `7.`);
60845	assert_eq_m128(r, e);
60846	}
60847
60848	#[simd_test(enable = "avx512f")]
60849	fn test_mm_mask_max_round_ss() {
60850	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
60851	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
60852	let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
60853	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
60854	assert_eq_m128(r, e);
60855	let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
60856	let e = _mm_set_ps(`0.`, `1.`, `2.`, `7.`);
60857	assert_eq_m128(r, e);
60858	}
60859
60860	#[simd_test(enable = "avx512f")]
60861	fn test_mm_maskz_max_round_ss() {
60862	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
60863	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
60864	let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
60865	let e = _mm_set_ps(`0.`, `1.`, `2.`, `0.`);
60866	assert_eq_m128(r, e);
60867	let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
60868	let e = _mm_set_ps(`0.`, `1.`, `2.`, `7.`);
60869	assert_eq_m128(r, e);
60870	}
60871
60872	#[simd_test(enable = "avx512f")]
60873	fn test_mm_max_round_sd() {
60874	let a = _mm_set_pd(`0.`, `1.`);
60875	let b = _mm_set_pd(`2.`, `3.`);
60876	let r = _mm_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
60877	let e = _mm_set_pd(`0.`, `3.`);
60878	assert_eq_m128d(r, e);
60879	}
60880
60881	#[simd_test(enable = "avx512f")]
60882	fn test_mm_mask_max_round_sd() {
60883	let a = _mm_set_pd(`0.`, `1.`);
60884	let b = _mm_set_pd(`2.`, `3.`);
60885	let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
60886	let e = _mm_set_pd(`0.`, `1.`);
60887	assert_eq_m128d(r, e);
60888	let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
60889	let e = _mm_set_pd(`0.`, `3.`);
60890	assert_eq_m128d(r, e);
60891	}
60892
60893	#[simd_test(enable = "avx512f")]
60894	fn test_mm_maskz_max_round_sd() {
60895	let a = _mm_set_pd(`0.`, `1.`);
60896	let b = _mm_set_pd(`2.`, `3.`);
60897	let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
60898	let e = _mm_set_pd(`0.`, `0.`);
60899	assert_eq_m128d(r, e);
60900	let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
60901	let e = _mm_set_pd(`0.`, `3.`);
60902	assert_eq_m128d(r, e);
60903	}
60904
60905	#[simd_test(enable = "avx512f")]
60906	fn test_mm_min_round_ss() {
60907	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
60908	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
60909	let r = _mm_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
60910	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
60911	assert_eq_m128(r, e);
60912	}
60913
60914	#[simd_test(enable = "avx512f")]
60915	fn test_mm_mask_min_round_ss() {
60916	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
60917	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
60918	let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
60919	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
60920	assert_eq_m128(r, e);
60921	let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
60922	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
60923	assert_eq_m128(r, e);
60924	}
60925
60926	#[simd_test(enable = "avx512f")]
60927	fn test_mm_maskz_min_round_ss() {
60928	let a = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
60929	let b = _mm_set_ps(`4.`, `5.`, `6.`, `7.`);
60930	let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
60931	let e = _mm_set_ps(`0.`, `1.`, `2.`, `0.`);
60932	assert_eq_m128(r, e);
60933	let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
60934	let e = _mm_set_ps(`0.`, `1.`, `2.`, `3.`);
60935	assert_eq_m128(r, e);
60936	}
60937
60938	#[simd_test(enable = "avx512f")]
60939	fn test_mm_min_round_sd() {
60940	let a = _mm_set_pd(`0.`, `1.`);
60941	let b = _mm_set_pd(`2.`, `3.`);
60942	let r = _mm_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
60943	let e = _mm_set_pd(`0.`, `1.`);
60944	assert_eq_m128d(r, e);
60945	}
60946
60947	#[simd_test(enable = "avx512f")]
60948	fn test_mm_mask_min_round_sd() {
60949	let a = _mm_set_pd(`0.`, `1.`);
60950	let b = _mm_set_pd(`2.`, `3.`);
60951	let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
60952	let e = _mm_set_pd(`0.`, `1.`);
60953	assert_eq_m128d(r, e);
60954	let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
60955	let e = _mm_set_pd(`0.`, `1.`);
60956	assert_eq_m128d(r, e);
60957	}
60958
60959	#[simd_test(enable = "avx512f")]
60960	fn test_mm_maskz_min_round_sd() {
60961	let a = _mm_set_pd(`0.`, `1.`);
60962	let b = _mm_set_pd(`2.`, `3.`);
60963	let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
60964	let e = _mm_set_pd(`0.`, `0.`);
60965	assert_eq_m128d(r, e);
60966	let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
60967	let e = _mm_set_pd(`0.`, `1.`);
60968	assert_eq_m128d(r, e);
60969	}
60970
60971	#[simd_test(enable = "avx512f")]
60972	fn test_mm_sqrt_round_ss() {
60973	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
60974	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
60975	let r = _mm_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
60976	let e = _mm_set_ps(`1.`, `2.`, `10.`, `2.`);
60977	assert_eq_m128(r, e);
60978	}
60979
60980	#[simd_test(enable = "avx512f")]
60981	fn test_mm_mask_sqrt_round_ss() {
60982	let src = _mm_set_ps(`10.`, `11.`, `100.`, `110.`);
60983	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
60984	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
60985	let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
60986	let e = _mm_set_ps(`1.`, `2.`, `10.`, `110.`);
60987	assert_eq_m128(r, e);
60988	let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
60989	src, `0b11111111`, a, b,
60990	);
60991	let e = _mm_set_ps(`1.`, `2.`, `10.`, `2.`);
60992	assert_eq_m128(r, e);
60993	}
60994
60995	#[simd_test(enable = "avx512f")]
60996	fn test_mm_maskz_sqrt_round_ss() {
60997	let a = _mm_set_ps(`1.`, `2.`, `10.`, `20.`);
60998	let b = _mm_set_ps(`3.`, `4.`, `30.`, `4.`);
60999	let r = _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
61000	let e = _mm_set_ps(`1.`, `2.`, `10.`, `0.`);
61001	assert_eq_m128(r, e);
61002	let r =
61003	_mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
61004	let e = _mm_set_ps(`1.`, `2.`, `10.`, `2.`);
61005	assert_eq_m128(r, e);
61006	}
61007
61008	#[simd_test(enable = "avx512f")]
61009	fn test_mm_sqrt_round_sd() {
61010	let a = _mm_set_pd(`1.`, `2.`);
61011	let b = _mm_set_pd(`3.`, `4.`);
61012	let r = _mm_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
61013	let e = _mm_set_pd(`1.`, `2.`);
61014	assert_eq_m128d(r, e);
61015	}
61016
61017	#[simd_test(enable = "avx512f")]
61018	fn test_mm_mask_sqrt_round_sd() {
61019	let src = _mm_set_pd(`10.`, `11.`);
61020	let a = _mm_set_pd(`1.`, `2.`);
61021	let b = _mm_set_pd(`3.`, `4.`);
61022	let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(src, `0`, a, b);
61023	let e = _mm_set_pd(`1.`, `11.`);
61024	assert_eq_m128d(r, e);
61025	let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
61026	src, `0b11111111`, a, b,
61027	);
61028	let e = _mm_set_pd(`1.`, `2.`);
61029	assert_eq_m128d(r, e);
61030	}
61031
61032	#[simd_test(enable = "avx512f")]
61033	fn test_mm_maskz_sqrt_round_sd() {
61034	let a = _mm_set_pd(`1.`, `2.`);
61035	let b = _mm_set_pd(`3.`, `4.`);
61036	let r = _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
61037	let e = _mm_set_pd(`1.`, `0.`);
61038	assert_eq_m128d(r, e);
61039	let r =
61040	_mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0b11111111`, a, b);
61041	let e = _mm_set_pd(`1.`, `2.`);
61042	assert_eq_m128d(r, e);
61043	}
61044
61045	#[simd_test(enable = "avx512f")]
61046	fn test_mm_getexp_round_ss() {
61047	let a = _mm_set1_ps(`2.`);
61048	let b = _mm_set1_ps(`3.`);
61049	let r = _mm_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
61050	let e = _mm_set_ps(`2.`, `2.`, `2.`, `1.`);
61051	assert_eq_m128(r, e);
61052	}
61053
61054	#[simd_test(enable = "avx512f")]
61055	fn test_mm_mask_getexp_round_ss() {
61056	let a = _mm_set1_ps(`2.`);
61057	let b = _mm_set1_ps(`3.`);
61058	let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
61059	let e = _mm_set_ps(`2.`, `2.`, `2.`, `2.`);
61060	assert_eq_m128(r, e);
61061	let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
61062	let e = _mm_set_ps(`2.`, `2.`, `2.`, `1.`);
61063	assert_eq_m128(r, e);
61064	}
61065
61066	#[simd_test(enable = "avx512f")]
61067	fn test_mm_maskz_getexp_round_ss() {
61068	let a = _mm_set1_ps(`2.`);
61069	let b = _mm_set1_ps(`3.`);
61070	let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
61071	let e = _mm_set_ps(`2.`, `2.`, `2.`, `0.`);
61072	assert_eq_m128(r, e);
61073	let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
61074	let e = _mm_set_ps(`2.`, `2.`, `2.`, `1.`);
61075	assert_eq_m128(r, e);
61076	}
61077
61078	#[simd_test(enable = "avx512f")]
61079	fn test_mm_getexp_round_sd() {
61080	let a = _mm_set1_pd(`2.`);
61081	let b = _mm_set1_pd(`3.`);
61082	let r = _mm_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
61083	let e = _mm_set_pd(`2.`, `1.`);
61084	assert_eq_m128d(r, e);
61085	}
61086
61087	#[simd_test(enable = "avx512f")]
61088	fn test_mm_mask_getexp_round_sd() {
61089	let a = _mm_set1_pd(`2.`);
61090	let b = _mm_set1_pd(`3.`);
61091	let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
61092	let e = _mm_set_pd(`2.`, `2.`);
61093	assert_eq_m128d(r, e);
61094	let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
61095	let e = _mm_set_pd(`2.`, `1.`);
61096	assert_eq_m128d(r, e);
61097	}
61098
61099	#[simd_test(enable = "avx512f")]
61100	fn test_mm_maskz_getexp_round_sd() {
61101	let a = _mm_set1_pd(`2.`);
61102	let b = _mm_set1_pd(`3.`);
61103	let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
61104	let e = _mm_set_pd(`2.`, `0.`);
61105	assert_eq_m128d(r, e);
61106	let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
61107	let e = _mm_set_pd(`2.`, `1.`);
61108	assert_eq_m128d(r, e);
61109	}
61110
61111	#[simd_test(enable = "avx512f")]
61112	fn test_mm_getmant_round_ss() {
61113	let a = _mm_set1_ps(`20.`);
61114	let b = _mm_set1_ps(`10.`);
61115	let r =
61116	_mm_getmant_round_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
61117	a, b,
61118	);
61119	let e = _mm_set_ps(`20.`, `20.`, `20.`, `1.25`);
61120	assert_eq_m128(r, e);
61121	}
61122
61123	#[simd_test(enable = "avx512f")]
61124	fn test_mm_mask_getmant_round_ss() {
61125	let a = _mm_set1_ps(`20.`);
61126	let b = _mm_set1_ps(`10.`);
61127	let r = _mm_mask_getmant_round_ss::<
61128	_MM_MANT_NORM_1_2,
61129	_MM_MANT_SIGN_SRC,
61130	_MM_FROUND_CUR_DIRECTION,
61131	>(a, `0`, a, b);
61132	let e = _mm_set_ps(`20.`, `20.`, `20.`, `20.`);
61133	assert_eq_m128(r, e);
61134	let r = _mm_mask_getmant_round_ss::<
61135	_MM_MANT_NORM_1_2,
61136	_MM_MANT_SIGN_SRC,
61137	_MM_FROUND_CUR_DIRECTION,
61138	>(a, `0b11111111`, a, b);
61139	let e = _mm_set_ps(`20.`, `20.`, `20.`, `1.25`);
61140	assert_eq_m128(r, e);
61141	}
61142
61143	#[simd_test(enable = "avx512f")]
61144	fn test_mm_maskz_getmant_round_ss() {
61145	let a = _mm_set1_ps(`20.`);
61146	let b = _mm_set1_ps(`10.`);
61147	let r = _mm_maskz_getmant_round_ss::<
61148	_MM_MANT_NORM_1_2,
61149	_MM_MANT_SIGN_SRC,
61150	_MM_FROUND_CUR_DIRECTION,
61151	>(`0`, a, b);
61152	let e = _mm_set_ps(`20.`, `20.`, `20.`, `0.`);
61153	assert_eq_m128(r, e);
61154	let r = _mm_maskz_getmant_round_ss::<
61155	_MM_MANT_NORM_1_2,
61156	_MM_MANT_SIGN_SRC,
61157	_MM_FROUND_CUR_DIRECTION,
61158	>(`0b11111111`, a, b);
61159	let e = _mm_set_ps(`20.`, `20.`, `20.`, `1.25`);
61160	assert_eq_m128(r, e);
61161	}
61162
61163	#[simd_test(enable = "avx512f")]
61164	fn test_mm_getmant_round_sd() {
61165	let a = _mm_set1_pd(`20.`);
61166	let b = _mm_set1_pd(`10.`);
61167	let r =
61168	_mm_getmant_round_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
61169	a, b,
61170	);
61171	let e = _mm_set_pd(`20.`, `1.25`);
61172	assert_eq_m128d(r, e);
61173	}
61174
61175	#[simd_test(enable = "avx512f")]
61176	fn test_mm_mask_getmant_round_sd() {
61177	let a = _mm_set1_pd(`20.`);
61178	let b = _mm_set1_pd(`10.`);
61179	let r = _mm_mask_getmant_round_sd::<
61180	_MM_MANT_NORM_1_2,
61181	_MM_MANT_SIGN_SRC,
61182	_MM_FROUND_CUR_DIRECTION,
61183	>(a, `0`, a, b);
61184	let e = _mm_set_pd(`20.`, `20.`);
61185	assert_eq_m128d(r, e);
61186	let r = _mm_mask_getmant_round_sd::<
61187	_MM_MANT_NORM_1_2,
61188	_MM_MANT_SIGN_SRC,
61189	_MM_FROUND_CUR_DIRECTION,
61190	>(a, `0b11111111`, a, b);
61191	let e = _mm_set_pd(`20.`, `1.25`);
61192	assert_eq_m128d(r, e);
61193	}
61194
61195	#[simd_test(enable = "avx512f")]
61196	fn test_mm_maskz_getmant_round_sd() {
61197	let a = _mm_set1_pd(`20.`);
61198	let b = _mm_set1_pd(`10.`);
61199	let r = _mm_maskz_getmant_round_sd::<
61200	_MM_MANT_NORM_1_2,
61201	_MM_MANT_SIGN_SRC,
61202	_MM_FROUND_CUR_DIRECTION,
61203	>(`0`, a, b);
61204	let e = _mm_set_pd(`20.`, `0.`);
61205	assert_eq_m128d(r, e);
61206	let r = _mm_maskz_getmant_round_sd::<
61207	_MM_MANT_NORM_1_2,
61208	_MM_MANT_SIGN_SRC,
61209	_MM_FROUND_CUR_DIRECTION,
61210	>(`0b11111111`, a, b);
61211	let e = _mm_set_pd(`20.`, `1.25`);
61212	assert_eq_m128d(r, e);
61213	}
61214
61215	#[simd_test(enable = "avx512f")]
61216	fn test_mm_roundscale_round_ss() {
61217	let a = _mm_set1_ps(`2.2`);
61218	let b = _mm_set1_ps(`1.1`);
61219	let r = _mm_roundscale_round_ss::<`0`, _MM_FROUND_CUR_DIRECTION>(a, b);
61220	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `1.0`);
61221	assert_eq_m128(r, e);
61222	}
61223
61224	#[simd_test(enable = "avx512f")]
61225	fn test_mm_mask_roundscale_round_ss() {
61226	let a = _mm_set1_ps(`2.2`);
61227	let b = _mm_set1_ps(`1.1`);
61228	let r = _mm_mask_roundscale_round_ss::<`0`, _MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
61229	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `2.2`);
61230	assert_eq_m128(r, e);
61231	let r = _mm_mask_roundscale_round_ss::<`0`, _MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
61232	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `1.0`);
61233	assert_eq_m128(r, e);
61234	}
61235
61236	#[simd_test(enable = "avx512f")]
61237	fn test_mm_maskz_roundscale_round_ss() {
61238	let a = _mm_set1_ps(`2.2`);
61239	let b = _mm_set1_ps(`1.1`);
61240	let r = _mm_maskz_roundscale_round_ss::<`0`, _MM_FROUND_CUR_DIRECTION>(`0`, a, b);
61241	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `0.0`);
61242	assert_eq_m128(r, e);
61243	let r = _mm_maskz_roundscale_round_ss::<`0`, _MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
61244	let e = _mm_set_ps(`2.2`, `2.2`, `2.2`, `1.0`);
61245	assert_eq_m128(r, e);
61246	}
61247
61248	#[simd_test(enable = "avx512f")]
61249	fn test_mm_roundscale_round_sd() {
61250	let a = _mm_set1_pd(`2.2`);
61251	let b = _mm_set1_pd(`1.1`);
61252	let r = _mm_roundscale_round_sd::<`0`, _MM_FROUND_CUR_DIRECTION>(a, b);
61253	let e = _mm_set_pd(`2.2`, `1.0`);
61254	assert_eq_m128d(r, e);
61255	}
61256
61257	#[simd_test(enable = "avx512f")]
61258	fn test_mm_mask_roundscale_round_sd() {
61259	let a = _mm_set1_pd(`2.2`);
61260	let b = _mm_set1_pd(`1.1`);
61261	let r = _mm_mask_roundscale_round_sd::<`0`, _MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
61262	let e = _mm_set_pd(`2.2`, `2.2`);
61263	assert_eq_m128d(r, e);
61264	let r = _mm_mask_roundscale_round_sd::<`0`, _MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
61265	let e = _mm_set_pd(`2.2`, `1.0`);
61266	assert_eq_m128d(r, e);
61267	}
61268
61269	#[simd_test(enable = "avx512f")]
61270	fn test_mm_maskz_roundscale_round_sd() {
61271	let a = _mm_set1_pd(`2.2`);
61272	let b = _mm_set1_pd(`1.1`);
61273	let r = _mm_maskz_roundscale_round_sd::<`0`, _MM_FROUND_CUR_DIRECTION>(`0`, a, b);
61274	let e = _mm_set_pd(`2.2`, `0.0`);
61275	assert_eq_m128d(r, e);
61276	let r = _mm_maskz_roundscale_round_sd::<`0`, _MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
61277	let e = _mm_set_pd(`2.2`, `1.0`);
61278	assert_eq_m128d(r, e);
61279	}
61280
61281	#[simd_test(enable = "avx512f")]
61282	fn test_mm_scalef_round_ss() {
61283	let a = _mm_set1_ps(`1.`);
61284	let b = _mm_set1_ps(`3.`);
61285	let r = _mm_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
61286	let e = _mm_set_ps(`1.`, `1.`, `1.`, `8.`);
61287	assert_eq_m128(r, e);
61288	}
61289
61290	#[simd_test(enable = "avx512f")]
61291	fn test_mm_mask_scalef_round_ss() {
61292	let a = _mm_set1_ps(`1.`);
61293	let b = _mm_set1_ps(`3.`);
61294	let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61295	a, `0`, a, b,
61296	);
61297	let e = _mm_set_ps(`1.`, `1.`, `1.`, `1.`);
61298	assert_eq_m128(r, e);
61299	let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61300	a, `0b11111111`, a, b,
61301	);
61302	let e = _mm_set_ps(`1.`, `1.`, `1.`, `8.`);
61303	assert_eq_m128(r, e);
61304	}
61305
61306	#[simd_test(enable = "avx512f")]
61307	fn test_mm_maskz_scalef_round_ss() {
61308	let a = _mm_set1_ps(`1.`);
61309	let b = _mm_set1_ps(`3.`);
61310	let r =
61311	_mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
61312	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
61313	assert_eq_m128(r, e);
61314	let r = _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61315	`0b11111111`, a, b,
61316	);
61317	let e = _mm_set_ps(`1.`, `1.`, `1.`, `8.`);
61318	assert_eq_m128(r, e);
61319	}
61320
61321	#[simd_test(enable = "avx512f")]
61322	fn test_mm_scalef_round_sd() {
61323	let a = _mm_set1_pd(`1.`);
61324	let b = _mm_set1_pd(`3.`);
61325	let r = _mm_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b);
61326	let e = _mm_set_pd(`1.`, `8.`);
61327	assert_eq_m128d(r, e);
61328	}
61329
61330	#[simd_test(enable = "avx512f")]
61331	fn test_mm_mask_scalef_round_sd() {
61332	let a = _mm_set1_pd(`1.`);
61333	let b = _mm_set1_pd(`3.`);
61334	let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61335	a, `0`, a, b,
61336	);
61337	let e = _mm_set_pd(`1.`, `1.`);
61338	assert_eq_m128d(r, e);
61339	let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61340	a, `0b11111111`, a, b,
61341	);
61342	let e = _mm_set_pd(`1.`, `8.`);
61343	assert_eq_m128d(r, e);
61344	}
61345
61346	#[simd_test(enable = "avx512f")]
61347	fn test_mm_maskz_scalef_round_sd() {
61348	let a = _mm_set1_pd(`1.`);
61349	let b = _mm_set1_pd(`3.`);
61350	let r =
61351	_mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(`0`, a, b);
61352	let e = _mm_set_pd(`1.`, `0.`);
61353	assert_eq_m128d(r, e);
61354	let r = _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61355	`0b11111111`, a, b,
61356	);
61357	let e = _mm_set_pd(`1.`, `8.`);
61358	assert_eq_m128d(r, e);
61359	}
61360
61361	#[simd_test(enable = "avx512f")]
61362	fn test_mm_fmadd_round_ss() {
61363	let a = _mm_set1_ps(`1.`);
61364	let b = _mm_set1_ps(`2.`);
61365	let c = _mm_set1_ps(`3.`);
61366	let r = _mm_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
61367	let e = _mm_set_ps(`1.`, `1.`, `1.`, `5.`);
61368	assert_eq_m128(r, e);
61369	}
61370
61371	#[simd_test(enable = "avx512f")]
61372	fn test_mm_mask_fmadd_round_ss() {
61373	let a = _mm_set1_ps(`1.`);
61374	let b = _mm_set1_ps(`2.`);
61375	let c = _mm_set1_ps(`3.`);
61376	let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61377	a, `0`, b, c,
61378	);
61379	assert_eq_m128(r, a);
61380	let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61381	a, `0b11111111`, b, c,
61382	);
61383	let e = _mm_set_ps(`1.`, `1.`, `1.`, `5.`);
61384	assert_eq_m128(r, e);
61385	}
61386
61387	#[simd_test(enable = "avx512f")]
61388	fn test_mm_maskz_fmadd_round_ss() {
61389	let a = _mm_set1_ps(`1.`);
61390	let b = _mm_set1_ps(`2.`);
61391	let c = _mm_set1_ps(`3.`);
61392	let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61393	`0`, a, b, c,
61394	);
61395	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
61396	assert_eq_m128(r, e);
61397	let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61398	`0b11111111`, a, b, c,
61399	);
61400	let e = _mm_set_ps(`1.`, `1.`, `1.`, `5.`);
61401	assert_eq_m128(r, e);
61402	}
61403
61404	#[simd_test(enable = "avx512f")]
61405	fn test_mm_mask3_fmadd_round_ss() {
61406	let a = _mm_set1_ps(`1.`);
61407	let b = _mm_set1_ps(`2.`);
61408	let c = _mm_set1_ps(`3.`);
61409	let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61410	a, b, c, `0`,
61411	);
61412	assert_eq_m128(r, c);
61413	let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61414	a, b, c, `0b11111111`,
61415	);
61416	let e = _mm_set_ps(`3.`, `3.`, `3.`, `5.`);
61417	assert_eq_m128(r, e);
61418	}
61419
61420	#[simd_test(enable = "avx512f")]
61421	fn test_mm_fmadd_round_sd() {
61422	let a = _mm_set1_pd(`1.`);
61423	let b = _mm_set1_pd(`2.`);
61424	let c = _mm_set1_pd(`3.`);
61425	let r = _mm_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
61426	let e = _mm_set_pd(`1.`, `5.`);
61427	assert_eq_m128d(r, e);
61428	}
61429
61430	#[simd_test(enable = "avx512f")]
61431	fn test_mm_mask_fmadd_round_sd() {
61432	let a = _mm_set1_pd(`1.`);
61433	let b = _mm_set1_pd(`2.`);
61434	let c = _mm_set1_pd(`3.`);
61435	let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61436	a, `0`, b, c,
61437	);
61438	assert_eq_m128d(r, a);
61439	let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61440	a, `0b11111111`, b, c,
61441	);
61442	let e = _mm_set_pd(`1.`, `5.`);
61443	assert_eq_m128d(r, e);
61444	}
61445
61446	#[simd_test(enable = "avx512f")]
61447	fn test_mm_maskz_fmadd_round_sd() {
61448	let a = _mm_set1_pd(`1.`);
61449	let b = _mm_set1_pd(`2.`);
61450	let c = _mm_set1_pd(`3.`);
61451	let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61452	`0`, a, b, c,
61453	);
61454	let e = _mm_set_pd(`1.`, `0.`);
61455	assert_eq_m128d(r, e);
61456	let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61457	`0b11111111`, a, b, c,
61458	);
61459	let e = _mm_set_pd(`1.`, `5.`);
61460	assert_eq_m128d(r, e);
61461	}
61462
61463	#[simd_test(enable = "avx512f")]
61464	fn test_mm_mask3_fmadd_round_sd() {
61465	let a = _mm_set1_pd(`1.`);
61466	let b = _mm_set1_pd(`2.`);
61467	let c = _mm_set1_pd(`3.`);
61468	let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61469	a, b, c, `0`,
61470	);
61471	assert_eq_m128d(r, c);
61472	let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61473	a, b, c, `0b11111111`,
61474	);
61475	let e = _mm_set_pd(`3.`, `5.`);
61476	assert_eq_m128d(r, e);
61477	}
61478
61479	#[simd_test(enable = "avx512f")]
61480	fn test_mm_fmsub_round_ss() {
61481	let a = _mm_set1_ps(`1.`);
61482	let b = _mm_set1_ps(`2.`);
61483	let c = _mm_set1_ps(`3.`);
61484	let r = _mm_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
61485	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-1.`);
61486	assert_eq_m128(r, e);
61487	}
61488
61489	#[simd_test(enable = "avx512f")]
61490	fn test_mm_mask_fmsub_round_ss() {
61491	let a = _mm_set1_ps(`1.`);
61492	let b = _mm_set1_ps(`2.`);
61493	let c = _mm_set1_ps(`3.`);
61494	let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61495	a, `0`, b, c,
61496	);
61497	assert_eq_m128(r, a);
61498	let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61499	a, `0b11111111`, b, c,
61500	);
61501	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-1.`);
61502	assert_eq_m128(r, e);
61503	}
61504
61505	#[simd_test(enable = "avx512f")]
61506	fn test_mm_maskz_fmsub_round_ss() {
61507	let a = _mm_set1_ps(`1.`);
61508	let b = _mm_set1_ps(`2.`);
61509	let c = _mm_set1_ps(`3.`);
61510	let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61511	`0`, a, b, c,
61512	);
61513	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
61514	assert_eq_m128(r, e);
61515	let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61516	`0b11111111`, a, b, c,
61517	);
61518	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-1.`);
61519	assert_eq_m128(r, e);
61520	}
61521
61522	#[simd_test(enable = "avx512f")]
61523	fn test_mm_mask3_fmsub_round_ss() {
61524	let a = _mm_set1_ps(`1.`);
61525	let b = _mm_set1_ps(`2.`);
61526	let c = _mm_set1_ps(`3.`);
61527	let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61528	a, b, c, `0`,
61529	);
61530	assert_eq_m128(r, c);
61531	let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61532	a, b, c, `0b11111111`,
61533	);
61534	let e = _mm_set_ps(`3.`, `3.`, `3.`, `-1.`);
61535	assert_eq_m128(r, e);
61536	}
61537
61538	#[simd_test(enable = "avx512f")]
61539	fn test_mm_fmsub_round_sd() {
61540	let a = _mm_set1_pd(`1.`);
61541	let b = _mm_set1_pd(`2.`);
61542	let c = _mm_set1_pd(`3.`);
61543	let r = _mm_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
61544	let e = _mm_set_pd(`1.`, `-1.`);
61545	assert_eq_m128d(r, e);
61546	}
61547
61548	#[simd_test(enable = "avx512f")]
61549	fn test_mm_mask_fmsub_round_sd() {
61550	let a = _mm_set1_pd(`1.`);
61551	let b = _mm_set1_pd(`2.`);
61552	let c = _mm_set1_pd(`3.`);
61553	let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61554	a, `0`, b, c,
61555	);
61556	assert_eq_m128d(r, a);
61557	let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61558	a, `0b11111111`, b, c,
61559	);
61560	let e = _mm_set_pd(`1.`, `-1.`);
61561	assert_eq_m128d(r, e);
61562	}
61563
61564	#[simd_test(enable = "avx512f")]
61565	fn test_mm_maskz_fmsub_round_sd() {
61566	let a = _mm_set1_pd(`1.`);
61567	let b = _mm_set1_pd(`2.`);
61568	let c = _mm_set1_pd(`3.`);
61569	let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61570	`0`, a, b, c,
61571	);
61572	let e = _mm_set_pd(`1.`, `0.`);
61573	assert_eq_m128d(r, e);
61574	let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61575	`0b11111111`, a, b, c,
61576	);
61577	let e = _mm_set_pd(`1.`, `-1.`);
61578	assert_eq_m128d(r, e);
61579	}
61580
61581	#[simd_test(enable = "avx512f")]
61582	fn test_mm_mask3_fmsub_round_sd() {
61583	let a = _mm_set1_pd(`1.`);
61584	let b = _mm_set1_pd(`2.`);
61585	let c = _mm_set1_pd(`3.`);
61586	let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61587	a, b, c, `0`,
61588	);
61589	assert_eq_m128d(r, c);
61590	let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61591	a, b, c, `0b11111111`,
61592	);
61593	let e = _mm_set_pd(`3.`, `-1.`);
61594	assert_eq_m128d(r, e);
61595	}
61596
61597	#[simd_test(enable = "avx512f")]
61598	fn test_mm_fnmadd_round_ss() {
61599	let a = _mm_set1_ps(`1.`);
61600	let b = _mm_set1_ps(`2.`);
61601	let c = _mm_set1_ps(`3.`);
61602	let r = _mm_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
61603	let e = _mm_set_ps(`1.`, `1.`, `1.`, `1.`);
61604	assert_eq_m128(r, e);
61605	}
61606
61607	#[simd_test(enable = "avx512f")]
61608	fn test_mm_mask_fnmadd_round_ss() {
61609	let a = _mm_set1_ps(`1.`);
61610	let b = _mm_set1_ps(`2.`);
61611	let c = _mm_set1_ps(`3.`);
61612	let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61613	a, `0`, b, c,
61614	);
61615	assert_eq_m128(r, a);
61616	let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61617	a, `0b11111111`, b, c,
61618	);
61619	let e = _mm_set_ps(`1.`, `1.`, `1.`, `1.`);
61620	assert_eq_m128(r, e);
61621	}
61622
61623	#[simd_test(enable = "avx512f")]
61624	fn test_mm_maskz_fnmadd_round_ss() {
61625	let a = _mm_set1_ps(`1.`);
61626	let b = _mm_set1_ps(`2.`);
61627	let c = _mm_set1_ps(`3.`);
61628	let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61629	`0`, a, b, c,
61630	);
61631	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
61632	assert_eq_m128(r, e);
61633	let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61634	`0b11111111`, a, b, c,
61635	);
61636	let e = _mm_set_ps(`1.`, `1.`, `1.`, `1.`);
61637	assert_eq_m128(r, e);
61638	}
61639
61640	#[simd_test(enable = "avx512f")]
61641	fn test_mm_mask3_fnmadd_round_ss() {
61642	let a = _mm_set1_ps(`1.`);
61643	let b = _mm_set1_ps(`2.`);
61644	let c = _mm_set1_ps(`3.`);
61645	let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61646	a, b, c, `0`,
61647	);
61648	assert_eq_m128(r, c);
61649	let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61650	a, b, c, `0b11111111`,
61651	);
61652	let e = _mm_set_ps(`3.`, `3.`, `3.`, `1.`);
61653	assert_eq_m128(r, e);
61654	}
61655
61656	#[simd_test(enable = "avx512f")]
61657	fn test_mm_fnmadd_round_sd() {
61658	let a = _mm_set1_pd(`1.`);
61659	let b = _mm_set1_pd(`2.`);
61660	let c = _mm_set1_pd(`3.`);
61661	let r = _mm_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
61662	let e = _mm_set_pd(`1.`, `1.`);
61663	assert_eq_m128d(r, e);
61664	}
61665
61666	#[simd_test(enable = "avx512f")]
61667	fn test_mm_mask_fnmadd_round_sd() {
61668	let a = _mm_set1_pd(`1.`);
61669	let b = _mm_set1_pd(`2.`);
61670	let c = _mm_set1_pd(`3.`);
61671	let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61672	a, `0`, b, c,
61673	);
61674	assert_eq_m128d(r, a);
61675	let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61676	a, `0b11111111`, b, c,
61677	);
61678	let e = _mm_set_pd(`1.`, `1.`);
61679	assert_eq_m128d(r, e);
61680	}
61681
61682	#[simd_test(enable = "avx512f")]
61683	fn test_mm_maskz_fnmadd_round_sd() {
61684	let a = _mm_set1_pd(`1.`);
61685	let b = _mm_set1_pd(`2.`);
61686	let c = _mm_set1_pd(`3.`);
61687	let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61688	`0`, a, b, c,
61689	);
61690	let e = _mm_set_pd(`1.`, `0.`);
61691	assert_eq_m128d(r, e);
61692	let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61693	`0b11111111`, a, b, c,
61694	);
61695	let e = _mm_set_pd(`1.`, `1.`);
61696	assert_eq_m128d(r, e);
61697	}
61698
61699	#[simd_test(enable = "avx512f")]
61700	fn test_mm_mask3_fnmadd_round_sd() {
61701	let a = _mm_set1_pd(`1.`);
61702	let b = _mm_set1_pd(`2.`);
61703	let c = _mm_set1_pd(`3.`);
61704	let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61705	a, b, c, `0`,
61706	);
61707	assert_eq_m128d(r, c);
61708	let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61709	a, b, c, `0b11111111`,
61710	);
61711	let e = _mm_set_pd(`3.`, `1.`);
61712	assert_eq_m128d(r, e);
61713	}
61714
61715	#[simd_test(enable = "avx512f")]
61716	fn test_mm_fnmsub_round_ss() {
61717	let a = _mm_set1_ps(`1.`);
61718	let b = _mm_set1_ps(`2.`);
61719	let c = _mm_set1_ps(`3.`);
61720	let r = _mm_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
61721	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-5.`);
61722	assert_eq_m128(r, e);
61723	}
61724
61725	#[simd_test(enable = "avx512f")]
61726	fn test_mm_mask_fnmsub_round_ss() {
61727	let a = _mm_set1_ps(`1.`);
61728	let b = _mm_set1_ps(`2.`);
61729	let c = _mm_set1_ps(`3.`);
61730	let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61731	a, `0`, b, c,
61732	);
61733	assert_eq_m128(r, a);
61734	let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61735	a, `0b11111111`, b, c,
61736	);
61737	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-5.`);
61738	assert_eq_m128(r, e);
61739	}
61740
61741	#[simd_test(enable = "avx512f")]
61742	fn test_mm_maskz_fnmsub_round_ss() {
61743	let a = _mm_set1_ps(`1.`);
61744	let b = _mm_set1_ps(`2.`);
61745	let c = _mm_set1_ps(`3.`);
61746	let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61747	`0`, a, b, c,
61748	);
61749	let e = _mm_set_ps(`1.`, `1.`, `1.`, `0.`);
61750	assert_eq_m128(r, e);
61751	let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61752	`0b11111111`, a, b, c,
61753	);
61754	let e = _mm_set_ps(`1.`, `1.`, `1.`, `-5.`);
61755	assert_eq_m128(r, e);
61756	}
61757
61758	#[simd_test(enable = "avx512f")]
61759	fn test_mm_mask3_fnmsub_round_ss() {
61760	let a = _mm_set1_ps(`1.`);
61761	let b = _mm_set1_ps(`2.`);
61762	let c = _mm_set1_ps(`3.`);
61763	let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61764	a, b, c, `0`,
61765	);
61766	assert_eq_m128(r, c);
61767	let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61768	a, b, c, `0b11111111`,
61769	);
61770	let e = _mm_set_ps(`3.`, `3.`, `3.`, `-5.`);
61771	assert_eq_m128(r, e);
61772	}
61773
61774	#[simd_test(enable = "avx512f")]
61775	fn test_mm_fnmsub_round_sd() {
61776	let a = _mm_set1_pd(`1.`);
61777	let b = _mm_set1_pd(`2.`);
61778	let c = _mm_set1_pd(`3.`);
61779	let r = _mm_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(a, b, c);
61780	let e = _mm_set_pd(`1.`, `-5.`);
61781	assert_eq_m128d(r, e);
61782	}
61783
61784	#[simd_test(enable = "avx512f")]
61785	fn test_mm_mask_fnmsub_round_sd() {
61786	let a = _mm_set1_pd(`1.`);
61787	let b = _mm_set1_pd(`2.`);
61788	let c = _mm_set1_pd(`3.`);
61789	let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61790	a, `0`, b, c,
61791	);
61792	assert_eq_m128d(r, a);
61793	let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61794	a, `0b11111111`, b, c,
61795	);
61796	let e = _mm_set_pd(`1.`, `-5.`);
61797	assert_eq_m128d(r, e);
61798	}
61799
61800	#[simd_test(enable = "avx512f")]
61801	fn test_mm_maskz_fnmsub_round_sd() {
61802	let a = _mm_set1_pd(`1.`);
61803	let b = _mm_set1_pd(`2.`);
61804	let c = _mm_set1_pd(`3.`);
61805	let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61806	`0`, a, b, c,
61807	);
61808	let e = _mm_set_pd(`1.`, `0.`);
61809	assert_eq_m128d(r, e);
61810	let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61811	`0b11111111`, a, b, c,
61812	);
61813	let e = _mm_set_pd(`1.`, `-5.`);
61814	assert_eq_m128d(r, e);
61815	}
61816
61817	#[simd_test(enable = "avx512f")]
61818	fn test_mm_mask3_fnmsub_round_sd() {
61819	let a = _mm_set1_pd(`1.`);
61820	let b = _mm_set1_pd(`2.`);
61821	let c = _mm_set1_pd(`3.`);
61822	let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61823	a, b, c, `0`,
61824	);
61825	assert_eq_m128d(r, c);
61826	let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC }>(
61827	a, b, c, `0b11111111`,
61828	);
61829	let e = _mm_set_pd(`3.`, `-5.`);
61830	assert_eq_m128d(r, e);
61831	}
61832
61833	#[simd_test(enable = "avx512f")]
61834	fn test_mm_fixupimm_ss() {
61835	let a = _mm_set_ps(`0.`, `0.`, `0.`, f32::NAN);
61836	let b = _mm_set1_ps(f32::MAX);
61837	let c = _mm_set1_epi32(i32::MAX);
61838	let r = _mm_fixupimm_ss::<`5`>(a, b, c);
61839	let e = _mm_set_ps(`0.`, `0.`, `0.`, `-0.0`);
61840	assert_eq_m128(r, e);
61841	}
61842
61843	#[simd_test(enable = "avx512f")]
61844	fn test_mm_mask_fixupimm_ss() {
61845	let a = _mm_set_ps(`0.`, `0.`, `0.`, f32::NAN);
61846	let b = _mm_set1_ps(f32::MAX);
61847	let c = _mm_set1_epi32(i32::MAX);
61848	let r = _mm_mask_fixupimm_ss::<`5`>(a, `0b11111111`, b, c);
61849	let e = _mm_set_ps(`0.`, `0.`, `0.`, `-0.0`);
61850	assert_eq_m128(r, e);
61851	}
61852
61853	#[simd_test(enable = "avx512f")]
61854	fn test_mm_maskz_fixupimm_ss() {
61855	let a = _mm_set_ps(`0.`, `0.`, `0.`, f32::NAN);
61856	let b = _mm_set1_ps(f32::MAX);
61857	let c = _mm_set1_epi32(i32::MAX);
61858	let r = _mm_maskz_fixupimm_ss::<`5`>(`0b00000000`, a, b, c);
61859	let e = _mm_set_ps(`0.`, `0.`, `0.`, `0.0`);
61860	assert_eq_m128(r, e);
61861	let r = _mm_maskz_fixupimm_ss::<`5`>(`0b11111111`, a, b, c);
61862	let e = _mm_set_ps(`0.`, `0.`, `0.`, `-0.0`);
61863	assert_eq_m128(r, e);
61864	}
61865
61866	#[simd_test(enable = "avx512f")]
61867	fn test_mm_fixupimm_sd() {
61868	let a = _mm_set_pd(`0.`, f64::NAN);
61869	let b = _mm_set1_pd(f64::MAX);
61870	let c = _mm_set1_epi64x(i32::MAX as i64);
61871	let r = _mm_fixupimm_sd::<`5`>(a, b, c);
61872	let e = _mm_set_pd(`0.`, `-0.0`);
61873	assert_eq_m128d(r, e);
61874	}
61875
61876	#[simd_test(enable = "avx512f")]
61877	fn test_mm_mask_fixupimm_sd() {
61878	let a = _mm_set_pd(`0.`, f64::NAN);
61879	let b = _mm_set1_pd(f64::MAX);
61880	let c = _mm_set1_epi64x(i32::MAX as i64);
61881	let r = _mm_mask_fixupimm_sd::<`5`>(a, `0b11111111`, b, c);
61882	let e = _mm_set_pd(`0.`, `-0.0`);
61883	assert_eq_m128d(r, e);
61884	}
61885
61886	#[simd_test(enable = "avx512f")]
61887	fn test_mm_maskz_fixupimm_sd() {
61888	let a = _mm_set_pd(`0.`, f64::NAN);
61889	let b = _mm_set1_pd(f64::MAX);
61890	let c = _mm_set1_epi64x(i32::MAX as i64);
61891	let r = _mm_maskz_fixupimm_sd::<`5`>(`0b00000000`, a, b, c);
61892	let e = _mm_set_pd(`0.`, `0.0`);
61893	assert_eq_m128d(r, e);
61894	let r = _mm_maskz_fixupimm_sd::<`5`>(`0b11111111`, a, b, c);
61895	let e = _mm_set_pd(`0.`, `-0.0`);
61896	assert_eq_m128d(r, e);
61897	}
61898
61899	#[simd_test(enable = "avx512f")]
61900	fn test_mm_fixupimm_round_ss() {
61901	let a = _mm_set_ps(`1.`, `0.`, `0.`, f32::NAN);
61902	let b = _mm_set1_ps(f32::MAX);
61903	let c = _mm_set1_epi32(i32::MAX);
61904	let r = _mm_fixupimm_round_ss::<`5`, _MM_FROUND_CUR_DIRECTION>(a, b, c);
61905	let e = _mm_set_ps(`1.`, `0.`, `0.`, `-0.0`);
61906	assert_eq_m128(r, e);
61907	}
61908
61909	#[simd_test(enable = "avx512f")]
61910	fn test_mm_mask_fixupimm_round_ss() {
61911	let a = _mm_set_ps(`0.`, `0.`, `0.`, f32::NAN);
61912	let b = _mm_set1_ps(f32::MAX);
61913	let c = _mm_set1_epi32(i32::MAX);
61914	let r = _mm_mask_fixupimm_round_ss::<`5`, _MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, b, c);
61915	let e = _mm_set_ps(`0.`, `0.`, `0.`, `-0.0`);
61916	assert_eq_m128(r, e);
61917	}
61918
61919	#[simd_test(enable = "avx512f")]
61920	fn test_mm_maskz_fixupimm_round_ss() {
61921	let a = _mm_set_ps(`0.`, `0.`, `0.`, f32::NAN);
61922	let b = _mm_set1_ps(f32::MAX);
61923	let c = _mm_set1_epi32(i32::MAX);
61924	let r = _mm_maskz_fixupimm_round_ss::<`5`, _MM_FROUND_CUR_DIRECTION>(`0b00000000`, a, b, c);
61925	let e = _mm_set_ps(`0.`, `0.`, `0.`, `0.0`);
61926	assert_eq_m128(r, e);
61927	let r = _mm_maskz_fixupimm_round_ss::<`5`, _MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b, c);
61928	let e = _mm_set_ps(`0.`, `0.`, `0.`, `-0.0`);
61929	assert_eq_m128(r, e);
61930	}
61931
61932	#[simd_test(enable = "avx512f")]
61933	fn test_mm_fixupimm_round_sd() {
61934	let a = _mm_set_pd(`0.`, f64::NAN);
61935	let b = _mm_set1_pd(f64::MAX);
61936	let c = _mm_set1_epi64x(i32::MAX as i64);
61937	let r = _mm_fixupimm_round_sd::<`5`, _MM_FROUND_CUR_DIRECTION>(a, b, c);
61938	let e = _mm_set_pd(`0.`, `-0.0`);
61939	assert_eq_m128d(r, e);
61940	}
61941
61942	#[simd_test(enable = "avx512f")]
61943	fn test_mm_mask_fixupimm_round_sd() {
61944	let a = _mm_set_pd(`0.`, f64::NAN);
61945	let b = _mm_set1_pd(f64::MAX);
61946	let c = _mm_set1_epi64x(i32::MAX as i64);
61947	let r = _mm_mask_fixupimm_round_sd::<`5`, _MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, b, c);
61948	let e = _mm_set_pd(`0.`, `-0.0`);
61949	assert_eq_m128d(r, e);
61950	}
61951
61952	#[simd_test(enable = "avx512f")]
61953	fn test_mm_maskz_fixupimm_round_sd() {
61954	let a = _mm_set_pd(`0.`, f64::NAN);
61955	let b = _mm_set1_pd(f64::MAX);
61956	let c = _mm_set1_epi64x(i32::MAX as i64);
61957	let r = _mm_maskz_fixupimm_round_sd::<`5`, _MM_FROUND_CUR_DIRECTION>(`0b00000000`, a, b, c);
61958	let e = _mm_set_pd(`0.`, `0.0`);
61959	assert_eq_m128d(r, e);
61960	let r = _mm_maskz_fixupimm_round_sd::<`5`, _MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b, c);
61961	let e = _mm_set_pd(`0.`, `-0.0`);
61962	assert_eq_m128d(r, e);
61963	}
61964
61965	#[simd_test(enable = "avx512f")]
61966	fn test_mm_mask_cvtss_sd() {
61967	let a = _mm_set_pd(`6.`, `-7.5`);
61968	let b = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
61969	let r = _mm_mask_cvtss_sd(a, `0`, a, b);
61970	assert_eq_m128d(r, a);
61971	let r = _mm_mask_cvtss_sd(a, `0b11111111`, a, b);
61972	let e = _mm_set_pd(`6.`, `-1.5`);
61973	assert_eq_m128d(r, e);
61974	}
61975
61976	#[simd_test(enable = "avx512f")]
61977	fn test_mm_maskz_cvtss_sd() {
61978	let a = _mm_set_pd(`6.`, `-7.5`);
61979	let b = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
61980	let r = _mm_maskz_cvtss_sd(`0`, a, b);
61981	let e = _mm_set_pd(`6.`, `0.`);
61982	assert_eq_m128d(r, e);
61983	let r = _mm_maskz_cvtss_sd(`0b11111111`, a, b);
61984	let e = _mm_set_pd(`6.`, `-1.5`);
61985	assert_eq_m128d(r, e);
61986	}
61987
61988	#[simd_test(enable = "avx512f")]
61989	fn test_mm_mask_cvtsd_ss() {
61990	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
61991	let b = _mm_set_pd(`6.`, `-7.5`);
61992	let r = _mm_mask_cvtsd_ss(a, `0`, a, b);
61993	assert_eq_m128(r, a);
61994	let r = _mm_mask_cvtsd_ss(a, `0b11111111`, a, b);
61995	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `-7.5`);
61996	assert_eq_m128(r, e);
61997	}
61998
61999	#[simd_test(enable = "avx512f")]
62000	fn test_mm_maskz_cvtsd_ss() {
62001	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
62002	let b = _mm_set_pd(`6.`, `-7.5`);
62003	let r = _mm_maskz_cvtsd_ss(`0`, a, b);
62004	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `0.`);
62005	assert_eq_m128(r, e);
62006	let r = _mm_maskz_cvtsd_ss(`0b11111111`, a, b);
62007	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `-7.5`);
62008	assert_eq_m128(r, e);
62009	}
62010
62011	#[simd_test(enable = "avx512f")]
62012	fn test_mm_cvt_roundss_sd() {
62013	let a = _mm_set_pd(`6.`, `-7.5`);
62014	let b = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
62015	let r = _mm_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
62016	let e = _mm_set_pd(`6.`, `-1.5`);
62017	assert_eq_m128d(r, e);
62018	}
62019
62020	#[simd_test(enable = "avx512f")]
62021	fn test_mm_mask_cvt_roundss_sd() {
62022	let a = _mm_set_pd(`6.`, `-7.5`);
62023	let b = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
62024	let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, `0`, a, b);
62025	assert_eq_m128d(r, a);
62026	let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, `0b11111111`, a, b);
62027	let e = _mm_set_pd(`6.`, `-1.5`);
62028	assert_eq_m128d(r, e);
62029	}
62030
62031	#[simd_test(enable = "avx512f")]
62032	fn test_mm_maskz_cvt_roundss_sd() {
62033	let a = _mm_set_pd(`6.`, `-7.5`);
62034	let b = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
62035	let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(`0`, a, b);
62036	let e = _mm_set_pd(`6.`, `0.`);
62037	assert_eq_m128d(r, e);
62038	let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(`0b11111111`, a, b);
62039	let e = _mm_set_pd(`6.`, `-1.5`);
62040	assert_eq_m128d(r, e);
62041	}
62042
62043	#[simd_test(enable = "avx512f")]
62044	fn test_mm_cvt_roundsd_ss() {
62045	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
62046	let b = _mm_set_pd(`6.`, `-7.5`);
62047	let r = _mm_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
62048	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `-7.5`);
62049	assert_eq_m128(r, e);
62050	}
62051
62052	#[simd_test(enable = "avx512f")]
62053	fn test_mm_mask_cvt_roundsd_ss() {
62054	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
62055	let b = _mm_set_pd(`6.`, `-7.5`);
62056	let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, `0`, a, b);
62057	assert_eq_m128(r, a);
62058	let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
62059	a, `0b11111111`, a, b,
62060	);
62061	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `-7.5`);
62062	assert_eq_m128(r, e);
62063	}
62064
62065	#[simd_test(enable = "avx512f")]
62066	fn test_mm_maskz_cvt_roundsd_ss() {
62067	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
62068	let b = _mm_set_pd(`6.`, `-7.5`);
62069	let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(`0`, a, b);
62070	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `0.`);
62071	assert_eq_m128(r, e);
62072	let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(
62073	`0b11111111`, a, b,
62074	);
62075	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `-7.5`);
62076	assert_eq_m128(r, e);
62077	}
62078
62079	#[simd_test(enable = "avx512f")]
62080	fn test_mm_cvt_roundss_si32() {
62081	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
62082	let r = _mm_cvt_roundss_si32::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a);
62083	let e: i32 = `-1`;
62084	assert_eq!(r, e);
62085	}
62086
62087	#[simd_test(enable = "avx512f")]
62088	fn test_mm_cvt_roundss_i32() {
62089	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
62090	let r = _mm_cvt_roundss_i32::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a);
62091	let e: i32 = `-1`;
62092	assert_eq!(r, e);
62093	}
62094
62095	#[simd_test(enable = "avx512f")]
62096	fn test_mm_cvt_roundss_u32() {
62097	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
62098	let r = _mm_cvt_roundss_u32::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a);
62099	let e: u32 = u32::MAX;
62100	assert_eq!(r, e);
62101	}
62102
62103	#[simd_test(enable = "avx512f")]
62104	fn test_mm_cvtss_i32() {
62105	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
62106	let r = _mm_cvtss_i32(a);
62107	let e: i32 = `-2`;
62108	assert_eq!(r, e);
62109	}
62110
62111	#[simd_test(enable = "avx512f")]
62112	fn test_mm_cvtss_u32() {
62113	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
62114	let r = _mm_cvtss_u32(a);
62115	let e: u32 = u32::MAX;
62116	assert_eq!(r, e);
62117	}
62118
62119	#[simd_test(enable = "avx512f")]
62120	fn test_mm_cvt_roundsd_si32() {
62121	let a = _mm_set_pd(`1.`, `-1.5`);
62122	let r = _mm_cvt_roundsd_si32::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a);
62123	let e: i32 = `-1`;
62124	assert_eq!(r, e);
62125	}
62126
62127	#[simd_test(enable = "avx512f")]
62128	fn test_mm_cvt_roundsd_i32() {
62129	let a = _mm_set_pd(`1.`, `-1.5`);
62130	let r = _mm_cvt_roundsd_i32::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a);
62131	let e: i32 = `-1`;
62132	assert_eq!(r, e);
62133	}
62134
62135	#[simd_test(enable = "avx512f")]
62136	fn test_mm_cvt_roundsd_u32() {
62137	let a = _mm_set_pd(`1.`, `-1.5`);
62138	let r = _mm_cvt_roundsd_u32::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a);
62139	let e: u32 = u32::MAX;
62140	assert_eq!(r, e);
62141	}
62142
62143	#[simd_test(enable = "avx512f")]
62144	fn test_mm_cvtsd_i32() {
62145	let a = _mm_set_pd(`1.`, `-1.5`);
62146	let r = _mm_cvtsd_i32(a);
62147	let e: i32 = `-2`;
62148	assert_eq!(r, e);
62149	}
62150
62151	#[simd_test(enable = "avx512f")]
62152	fn test_mm_cvtsd_u32() {
62153	let a = _mm_set_pd(`1.`, `-1.5`);
62154	let r = _mm_cvtsd_u32(a);
62155	let e: u32 = u32::MAX;
62156	assert_eq!(r, e);
62157	}
62158
62159	#[simd_test(enable = "avx512f")]
62160	fn test_mm_cvt_roundi32_ss() {
62161	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
62162	let b: i32 = `9`;
62163	let r = _mm_cvt_roundi32_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
62164	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `9.`);
62165	assert_eq_m128(r, e);
62166	}
62167
62168	#[simd_test(enable = "avx512f")]
62169	fn test_mm_cvt_roundsi32_ss() {
62170	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
62171	let b: i32 = `9`;
62172	let r = _mm_cvt_roundsi32_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
62173	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `9.`);
62174	assert_eq_m128(r, e);
62175	}
62176
62177	#[simd_test(enable = "avx512f")]
62178	fn test_mm_cvt_roundu32_ss() {
62179	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
62180	let b: u32 = `9`;
62181	let r = _mm_cvt_roundu32_ss::<{ _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC }>(a, b);
62182	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `9.`);
62183	assert_eq_m128(r, e);
62184	}
62185
62186	#[simd_test(enable = "avx512f")]
62187	const fn test_mm_cvti32_ss() {
62188	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
62189	let b: i32 = `9`;
62190	let r = _mm_cvti32_ss(a, b);
62191	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `9.`);
62192	assert_eq_m128(r, e);
62193	}
62194
62195	#[simd_test(enable = "avx512f")]
62196	const fn test_mm_cvti32_sd() {
62197	let a = _mm_set_pd(`1.`, `-1.5`);
62198	let b: i32 = `9`;
62199	let r = _mm_cvti32_sd(a, b);
62200	let e = _mm_set_pd(`1.`, `9.`);
62201	assert_eq_m128d(r, e);
62202	}
62203
62204	#[simd_test(enable = "avx512f")]
62205	fn test_mm_cvtt_roundss_si32() {
62206	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
62207	let r = _mm_cvtt_roundss_si32::<_MM_FROUND_NO_EXC>(a);
62208	let e: i32 = `-1`;
62209	assert_eq!(r, e);
62210	}
62211
62212	#[simd_test(enable = "avx512f")]
62213	fn test_mm_cvtt_roundss_i32() {
62214	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
62215	let r = _mm_cvtt_roundss_i32::<_MM_FROUND_NO_EXC>(a);
62216	let e: i32 = `-1`;
62217	assert_eq!(r, e);
62218	}
62219
62220	#[simd_test(enable = "avx512f")]
62221	fn test_mm_cvtt_roundss_u32() {
62222	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
62223	let r = _mm_cvtt_roundss_u32::<_MM_FROUND_NO_EXC>(a);
62224	let e: u32 = u32::MAX;
62225	assert_eq!(r, e);
62226	}
62227
62228	#[simd_test(enable = "avx512f")]
62229	fn test_mm_cvttss_i32() {
62230	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
62231	let r = _mm_cvttss_i32(a);
62232	let e: i32 = `-1`;
62233	assert_eq!(r, e);
62234	}
62235
62236	#[simd_test(enable = "avx512f")]
62237	fn test_mm_cvttss_u32() {
62238	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
62239	let r = _mm_cvttss_u32(a);
62240	let e: u32 = u32::MAX;
62241	assert_eq!(r, e);
62242	}
62243
62244	#[simd_test(enable = "avx512f")]
62245	fn test_mm_cvtt_roundsd_si32() {
62246	let a = _mm_set_pd(`1.`, `-1.5`);
62247	let r = _mm_cvtt_roundsd_si32::<_MM_FROUND_NO_EXC>(a);
62248	let e: i32 = `-1`;
62249	assert_eq!(r, e);
62250	}
62251
62252	#[simd_test(enable = "avx512f")]
62253	fn test_mm_cvtt_roundsd_i32() {
62254	let a = _mm_set_pd(`1.`, `-1.5`);
62255	let r = _mm_cvtt_roundsd_i32::<_MM_FROUND_NO_EXC>(a);
62256	let e: i32 = `-1`;
62257	assert_eq!(r, e);
62258	}
62259
62260	#[simd_test(enable = "avx512f")]
62261	fn test_mm_cvtt_roundsd_u32() {
62262	let a = _mm_set_pd(`1.`, `-1.5`);
62263	let r = _mm_cvtt_roundsd_u32::<_MM_FROUND_NO_EXC>(a);
62264	let e: u32 = u32::MAX;
62265	assert_eq!(r, e);
62266	}
62267
62268	#[simd_test(enable = "avx512f")]
62269	fn test_mm_cvttsd_i32() {
62270	let a = _mm_set_pd(`1.`, `-1.5`);
62271	let r = _mm_cvttsd_i32(a);
62272	let e: i32 = `-1`;
62273	assert_eq!(r, e);
62274	}
62275
62276	#[simd_test(enable = "avx512f")]
62277	fn test_mm_cvttsd_u32() {
62278	let a = _mm_set_pd(`1.`, `-1.5`);
62279	let r = _mm_cvttsd_u32(a);
62280	let e: u32 = u32::MAX;
62281	assert_eq!(r, e);
62282	}
62283
62284	#[simd_test(enable = "avx512f")]
62285	const fn test_mm_cvtu32_ss() {
62286	let a = _mm_set_ps(`0.`, `-0.5`, `1.`, `-1.5`);
62287	let b: u32 = `9`;
62288	let r = _mm_cvtu32_ss(a, b);
62289	let e = _mm_set_ps(`0.`, `-0.5`, `1.`, `9.`);
62290	assert_eq_m128(r, e);
62291	}
62292
62293	#[simd_test(enable = "avx512f")]
62294	const fn test_mm_cvtu32_sd() {
62295	let a = _mm_set_pd(`1.`, `-1.5`);
62296	let b: u32 = `9`;
62297	let r = _mm_cvtu32_sd(a, b);
62298	let e = _mm_set_pd(`1.`, `9.`);
62299	assert_eq_m128d(r, e);
62300	}
62301
62302	#[simd_test(enable = "avx512f")]
62303	fn test_mm_comi_round_ss() {
62304	let a = _mm_set1_ps(`2.2`);
62305	let b = _mm_set1_ps(`1.1`);
62306	let r = _mm_comi_round_ss::<`0`, _MM_FROUND_CUR_DIRECTION>(a, b);
62307	let e: i32 = `0`;
62308	assert_eq!(r, e);
62309	}
62310
62311	#[simd_test(enable = "avx512f")]
62312	fn test_mm_comi_round_sd() {
62313	let a = _mm_set1_pd(`2.2`);
62314	let b = _mm_set1_pd(`1.1`);
62315	let r = _mm_comi_round_sd::<`0`, _MM_FROUND_CUR_DIRECTION>(a, b);
62316	let e: i32 = `0`;
62317	assert_eq!(r, e);
62318	}
62319
62320	#[simd_test(enable = "avx512f")]
62321	const fn test_mm512_cvtsi512_si32() {
62322	let a = _mm512_setr_epi32(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`);
62323	let r = _mm512_cvtsi512_si32(a);
62324	let e: i32 = `1`;
62325	assert_eq!(r, e);
62326	}
62327
62328	#[simd_test(enable = "avx512f")]
62329	const fn test_mm512_cvtss_f32() {
62330	let a = _mm512_setr_ps(
62331	`312.0134`, `3.`, `2.`, `5.`, `8.`, `9.`, `64.`, `50.`, `-4.`, `-3.`, `-2.`, `-5.`, `-8.`, `-9.`, `-64.`, `-50.`,
62332	);
62333	assert_eq!(_mm512_cvtss_f32(a), `312.0134`);
62334	}
62335
62336	#[simd_test(enable = "avx512f")]
62337	const fn test_mm512_cvtsd_f64() {
62338	let r = _mm512_cvtsd_f64(_mm512_setr_pd(`-1.1`, `2.2`, `3.3`, `4.4`, `5.5`, `6.6`, `7.7`, `8.8`));
62339	assert_eq!(r, -`1.1`);
62340	}
62341
62342	#[simd_test(enable = "avx512f")]
62343	const fn test_mm512_shuffle_pd() {
62344	let a = _mm512_setr_pd(`1.`, `4.`, `5.`, `8.`, `1.`, `4.`, `5.`, `8.`);
62345	let b = _mm512_setr_pd(`2.`, `3.`, `6.`, `7.`, `2.`, `3.`, `6.`, `7.`);
62346	let r = _mm512_shuffle_pd::<`0b11_11_11_11`>(a, b);
62347	let e = _mm512_setr_pd(`4.`, `3.`, `8.`, `7.`, `4.`, `3.`, `8.`, `7.`);
62348	assert_eq_m512d(r, e);
62349	}
62350
62351	#[simd_test(enable = "avx512f")]
62352	const fn test_mm512_mask_shuffle_pd() {
62353	let a = _mm512_setr_pd(`1.`, `4.`, `5.`, `8.`, `1.`, `4.`, `5.`, `8.`);
62354	let b = _mm512_setr_pd(`2.`, `3.`, `6.`, `7.`, `2.`, `3.`, `6.`, `7.`);
62355	let r = _mm512_mask_shuffle_pd::<`0b11_11_11_11`>(a, `0`, a, b);
62356	assert_eq_m512d(r, a);
62357	let r = _mm512_mask_shuffle_pd::<`0b11_11_11_11`>(a, `0b11111111`, a, b);
62358	let e = _mm512_setr_pd(`4.`, `3.`, `8.`, `7.`, `4.`, `3.`, `8.`, `7.`);
62359	assert_eq_m512d(r, e);
62360	}
62361
62362	#[simd_test(enable = "avx512f")]
62363	const fn test_mm512_maskz_shuffle_pd() {
62364	let a = _mm512_setr_pd(`1.`, `4.`, `5.`, `8.`, `1.`, `4.`, `5.`, `8.`);
62365	let b = _mm512_setr_pd(`2.`, `3.`, `6.`, `7.`, `2.`, `3.`, `6.`, `7.`);
62366	let r = _mm512_maskz_shuffle_pd::<`0b11_11_11_11`>(`0`, a, b);
62367	assert_eq_m512d(r, _mm512_setzero_pd());
62368	let r = _mm512_maskz_shuffle_pd::<`0b11_11_11_11`>(`0b00001111`, a, b);
62369	let e = _mm512_setr_pd(`4.`, `3.`, `8.`, `7.`, `0.`, `0.`, `0.`, `0.`);
62370	assert_eq_m512d(r, e);
62371	}
62372
62373	#[simd_test(enable = "avx512f")]
62374	fn test_mm512_mask_expandloadu_epi32() {
62375	let src = _mm512_set1_epi32(`42`);
62376	let a = &[`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`];
62377	let p = a.as_ptr();
62378	let m = `0b11101000_11001010`;
62379	let r = unsafe { _mm512_mask_expandloadu_epi32(src, m, black_box(p)) };
62380	let e = _mm512_set_epi32(`8`, `7`, `6`, `42`, `5`, `42`, `42`, `42`, `4`, `3`, `42`, `42`, `2`, `42`, `1`, `42`);
62381	assert_eq_m512i(r, e);
62382	}
62383
62384	#[simd_test(enable = "avx512f")]
62385	fn test_mm512_maskz_expandloadu_epi32() {
62386	let a = &[`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`];
62387	let p = a.as_ptr();
62388	let m = `0b11101000_11001010`;
62389	let r = unsafe { _mm512_maskz_expandloadu_epi32(m, black_box(p)) };
62390	let e = _mm512_set_epi32(`8`, `7`, `6`, `0`, `5`, `0`, `0`, `0`, `4`, `3`, `0`, `0`, `2`, `0`, `1`, `0`);
62391	assert_eq_m512i(r, e);
62392	}
62393
62394	#[simd_test(enable = "avx512f,avx512vl")]
62395	fn test_mm256_mask_expandloadu_epi32() {
62396	let src = _mm256_set1_epi32(`42`);
62397	let a = &[`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
62398	let p = a.as_ptr();
62399	let m = `0b11101000`;
62400	let r = unsafe { _mm256_mask_expandloadu_epi32(src, m, black_box(p)) };
62401	let e = _mm256_set_epi32(`4`, `3`, `2`, `42`, `1`, `42`, `42`, `42`);
62402	assert_eq_m256i(r, e);
62403	}
62404
62405	#[simd_test(enable = "avx512f,avx512vl")]
62406	fn test_mm256_maskz_expandloadu_epi32() {
62407	let a = &[`1_i32`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
62408	let p = a.as_ptr();
62409	let m = `0b11101000`;
62410	let r = unsafe { _mm256_maskz_expandloadu_epi32(m, black_box(p)) };
62411	let e = _mm256_set_epi32(`4`, `3`, `2`, `0`, `1`, `0`, `0`, `0`);
62412	assert_eq_m256i(r, e);
62413	}
62414
62415	#[simd_test(enable = "avx512f,avx512vl")]
62416	fn test_mm_mask_expandloadu_epi32() {
62417	let src = _mm_set1_epi32(`42`);
62418	let a = &[`1_i32`, `2`, `3`, `4`];
62419	let p = a.as_ptr();
62420	let m = `0b11111000`;
62421	let r = unsafe { _mm_mask_expandloadu_epi32(src, m, black_box(p)) };
62422	let e = _mm_set_epi32(`1`, `42`, `42`, `42`);
62423	assert_eq_m128i(r, e);
62424	}
62425
62426	#[simd_test(enable = "avx512f,avx512vl")]
62427	fn test_mm_maskz_expandloadu_epi32() {
62428	let a = &[`1_i32`, `2`, `3`, `4`];
62429	let p = a.as_ptr();
62430	let m = `0b11111000`;
62431	let r = unsafe { _mm_maskz_expandloadu_epi32(m, black_box(p)) };
62432	let e = _mm_set_epi32(`1`, `0`, `0`, `0`);
62433	assert_eq_m128i(r, e);
62434	}
62435
62436	#[simd_test(enable = "avx512f")]
62437	fn test_mm512_mask_expandloadu_epi64() {
62438	let src = _mm512_set1_epi64(`42`);
62439	let a = &[`1_i64`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
62440	let p = a.as_ptr();
62441	let m = `0b11101000`;
62442	let r = unsafe { _mm512_mask_expandloadu_epi64(src, m, black_box(p)) };
62443	let e = _mm512_set_epi64(`4`, `3`, `2`, `42`, `1`, `42`, `42`, `42`);
62444	assert_eq_m512i(r, e);
62445	}
62446
62447	#[simd_test(enable = "avx512f")]
62448	fn test_mm512_maskz_expandloadu_epi64() {
62449	let a = &[`1_i64`, `2`, `3`, `4`, `5`, `6`, `7`, `8`];
62450	let p = a.as_ptr();
62451	let m = `0b11101000`;
62452	let r = unsafe { _mm512_maskz_expandloadu_epi64(m, black_box(p)) };
62453	let e = _mm512_set_epi64(`4`, `3`, `2`, `0`, `1`, `0`, `0`, `0`);
62454	assert_eq_m512i(r, e);
62455	}
62456
62457	#[simd_test(enable = "avx512f,avx512vl")]
62458	fn test_mm256_mask_expandloadu_epi64() {
62459	let src = _mm256_set1_epi64x(`42`);
62460	let a = &[`1_i64`, `2`, `3`, `4`];
62461	let p = a.as_ptr();
62462	let m = `0b11101000`;
62463	let r = unsafe { _mm256_mask_expandloadu_epi64(src, m, black_box(p)) };
62464	let e = _mm256_set_epi64x(`1`, `42`, `42`, `42`);
62465	assert_eq_m256i(r, e);
62466	}
62467
62468	#[simd_test(enable = "avx512f,avx512vl")]
62469	fn test_mm256_maskz_expandloadu_epi64() {
62470	let a = &[`1_i64`, `2`, `3`, `4`];
62471	let p = a.as_ptr();
62472	let m = `0b11101000`;
62473	let r = unsafe { _mm256_maskz_expandloadu_epi64(m, black_box(p)) };
62474	let e = _mm256_set_epi64x(`1`, `0`, `0`, `0`);
62475	assert_eq_m256i(r, e);
62476	}
62477
62478	#[simd_test(enable = "avx512f,avx512vl")]
62479	fn test_mm_mask_expandloadu_epi64() {
62480	let src = _mm_set1_epi64x(`42`);
62481	let a = &[`1_i64`, `2`];
62482	let p = a.as_ptr();
62483	let m = `0b11101000`;
62484	let r = unsafe { _mm_mask_expandloadu_epi64(src, m, black_box(p)) };
62485	let e = _mm_set_epi64x(`42`, `42`);
62486	assert_eq_m128i(r, e);
62487	}
62488
62489	#[simd_test(enable = "avx512f,avx512vl")]
62490	fn test_mm_maskz_expandloadu_epi64() {
62491	let a = &[`1_i64`, `2`];
62492	let p = a.as_ptr();
62493	let m = `0b11101000`;
62494	let r = unsafe { _mm_maskz_expandloadu_epi64(m, black_box(p)) };
62495	let e = _mm_set_epi64x(`0`, `0`);
62496	assert_eq_m128i(r, e);
62497	}
62498
62499	#[simd_test(enable = "avx512f")]
62500	fn test_mm512_mask_expandloadu_ps() {
62501	let src = _mm512_set1_ps(`42.`);
62502	let a = &[
62503	`1.0f32`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
62504	];
62505	let p = a.as_ptr();
62506	let m = `0b11101000_11001010`;
62507	let r = unsafe { _mm512_mask_expandloadu_ps(src, m, black_box(p)) };
62508	let e = _mm512_set_ps(
62509	`8.`, `7.`, `6.`, `42.`, `5.`, `42.`, `42.`, `42.`, `4.`, `3.`, `42.`, `42.`, `2.`, `42.`, `1.`, `42.`,
62510	);
62511	assert_eq_m512(r, e);
62512	}
62513
62514	#[simd_test(enable = "avx512f")]
62515	fn test_mm512_maskz_expandloadu_ps() {
62516	let a = &[
62517	`1.0f32`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`, `16.`,
62518	];
62519	let p = a.as_ptr();
62520	let m = `0b11101000_11001010`;
62521	let r = unsafe { _mm512_maskz_expandloadu_ps(m, black_box(p)) };
62522	let e = _mm512_set_ps(
62523	`8.`, `7.`, `6.`, `0.`, `5.`, `0.`, `0.`, `0.`, `4.`, `3.`, `0.`, `0.`, `2.`, `0.`, `1.`, `0.`,
62524	);
62525	assert_eq_m512(r, e);
62526	}
62527
62528	#[simd_test(enable = "avx512f,avx512vl")]
62529	fn test_mm256_mask_expandloadu_ps() {
62530	let src = _mm256_set1_ps(`42.`);
62531	let a = &[`1.0f32`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`];
62532	let p = a.as_ptr();
62533	let m = `0b11101000`;
62534	let r = unsafe { _mm256_mask_expandloadu_ps(src, m, black_box(p)) };
62535	let e = _mm256_set_ps(`4.`, `3.`, `2.`, `42.`, `1.`, `42.`, `42.`, `42.`);
62536	assert_eq_m256(r, e);
62537	}
62538
62539	#[simd_test(enable = "avx512f,avx512vl")]
62540	fn test_mm256_maskz_expandloadu_ps() {
62541	let a = &[`1.0f32`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`];
62542	let p = a.as_ptr();
62543	let m = `0b11101000`;
62544	let r = unsafe { _mm256_maskz_expandloadu_ps(m, black_box(p)) };
62545	let e = _mm256_set_ps(`4.`, `3.`, `2.`, `0.`, `1.`, `0.`, `0.`, `0.`);
62546	assert_eq_m256(r, e);
62547	}
62548
62549	#[simd_test(enable = "avx512f,avx512vl")]
62550	fn test_mm_mask_expandloadu_ps() {
62551	let src = _mm_set1_ps(`42.`);
62552	let a = &[`1.0f32`, `2.`, `3.`, `4.`];
62553	let p = a.as_ptr();
62554	let m = `0b11101000`;
62555	let r = unsafe { _mm_mask_expandloadu_ps(src, m, black_box(p)) };
62556	let e = _mm_set_ps(`1.`, `42.`, `42.`, `42.`);
62557	assert_eq_m128(r, e);
62558	}
62559
62560	#[simd_test(enable = "avx512f,avx512vl")]
62561	fn test_mm_maskz_expandloadu_ps() {
62562	let a = &[`1.0f32`, `2.`, `3.`, `4.`];
62563	let p = a.as_ptr();
62564	let m = `0b11101000`;
62565	let r = unsafe { _mm_maskz_expandloadu_ps(m, black_box(p)) };
62566	let e = _mm_set_ps(`1.`, `0.`, `0.`, `0.`);
62567	assert_eq_m128(r, e);
62568	}
62569
62570	#[simd_test(enable = "avx512f")]
62571	fn test_mm512_mask_expandloadu_pd() {
62572	let src = _mm512_set1_pd(`42.`);
62573	let a = &[`1.0f64`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`];
62574	let p = a.as_ptr();
62575	let m = `0b11101000`;
62576	let r = unsafe { _mm512_mask_expandloadu_pd(src, m, black_box(p)) };
62577	let e = _mm512_set_pd(`4.`, `3.`, `2.`, `42.`, `1.`, `42.`, `42.`, `42.`);
62578	assert_eq_m512d(r, e);
62579	}
62580
62581	#[simd_test(enable = "avx512f")]
62582	fn test_mm512_maskz_expandloadu_pd() {
62583	let a = &[`1.0f64`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`];
62584	let p = a.as_ptr();
62585	let m = `0b11101000`;
62586	let r = unsafe { _mm512_maskz_expandloadu_pd(m, black_box(p)) };
62587	let e = _mm512_set_pd(`4.`, `3.`, `2.`, `0.`, `1.`, `0.`, `0.`, `0.`);
62588	assert_eq_m512d(r, e);
62589	}
62590
62591	#[simd_test(enable = "avx512f,avx512vl")]
62592	fn test_mm256_mask_expandloadu_pd() {
62593	let src = _mm256_set1_pd(`42.`);
62594	let a = &[`1.0f64`, `2.`, `3.`, `4.`];
62595	let p = a.as_ptr();
62596	let m = `0b11101000`;
62597	let r = unsafe { _mm256_mask_expandloadu_pd(src, m, black_box(p)) };
62598	let e = _mm256_set_pd(`1.`, `42.`, `42.`, `42.`);
62599	assert_eq_m256d(r, e);
62600	}
62601
62602	#[simd_test(enable = "avx512f,avx512vl")]
62603	fn test_mm256_maskz_expandloadu_pd() {
62604	let a = &[`1.0f64`, `2.`, `3.`, `4.`];
62605	let p = a.as_ptr();
62606	let m = `0b11101000`;
62607	let r = unsafe { _mm256_maskz_expandloadu_pd(m, black_box(p)) };
62608	let e = _mm256_set_pd(`1.`, `0.`, `0.`, `0.`);
62609	assert_eq_m256d(r, e);
62610	}
62611
62612	#[simd_test(enable = "avx512f,avx512vl")]
62613	fn test_mm_mask_expandloadu_pd() {
62614	let src = _mm_set1_pd(`42.`);
62615	let a = &[`1.0f64`, `2.`];
62616	let p = a.as_ptr();
62617	let m = `0b11101000`;
62618	let r = unsafe { _mm_mask_expandloadu_pd(src, m, black_box(p)) };
62619	let e = _mm_set_pd(`42.`, `42.`);
62620	assert_eq_m128d(r, e);
62621	}
62622
62623	#[simd_test(enable = "avx512f,avx512vl")]
62624	fn test_mm_maskz_expandloadu_pd() {
62625	let a = &[`1.0f64`, `2.`];
62626	let p = a.as_ptr();
62627	let m = `0b11101000`;
62628	let r = unsafe { _mm_maskz_expandloadu_pd(m, black_box(p)) };
62629	let e = _mm_set_pd(`0.`, `0.`);
62630	assert_eq_m128d(r, e);
62631	}
62632	}
62633