1#[cfg(test)]
4use stdarch_test::assert_instr;
5
6use crate::{
7    core_arch::{simd::*, x86::*},
8    intrinsics::simd::*,
9    intrinsics::sqrtf64,
10    mem, ptr,
11};
12
13#[inline]
20#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
21#[stable(feature = "simd_x86", since = "1.27.0")]
22pub unsafe fn _mm_pause() {
23    pause()
26}
27
28#[inline]
33#[target_feature(enable = "sse2")]
34#[cfg_attr(test, assert_instr(clflush))]
35#[stable(feature = "simd_x86", since = "1.27.0")]
36pub unsafe fn _mm_clflush(p: *const u8) {
37    clflush(p)
38}
39
40#[inline]
49#[target_feature(enable = "sse2")]
50#[cfg_attr(test, assert_instr(lfence))]
51#[stable(feature = "simd_x86", since = "1.27.0")]
52pub unsafe fn _mm_lfence() {
53    lfence()
54}
55
56#[inline]
65#[target_feature(enable = "sse2")]
66#[cfg_attr(test, assert_instr(mfence))]
67#[stable(feature = "simd_x86", since = "1.27.0")]
68pub unsafe fn _mm_mfence() {
69    mfence()
70}
71
72#[inline]
76#[target_feature(enable = "sse2")]
77#[cfg_attr(test, assert_instr(paddb))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79pub fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
80    unsafe { transmute(simd_add(a.as_i8x16(), b.as_i8x16())) }
81}
82
83#[inline]
87#[target_feature(enable = "sse2")]
88#[cfg_attr(test, assert_instr(paddw))]
89#[stable(feature = "simd_x86", since = "1.27.0")]
90pub fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
91    unsafe { transmute(simd_add(a.as_i16x8(), b.as_i16x8())) }
92}
93
94#[inline]
98#[target_feature(enable = "sse2")]
99#[cfg_attr(test, assert_instr(paddd))]
100#[stable(feature = "simd_x86", since = "1.27.0")]
101pub fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
102    unsafe { transmute(simd_add(a.as_i32x4(), b.as_i32x4())) }
103}
104
105#[inline]
109#[target_feature(enable = "sse2")]
110#[cfg_attr(test, assert_instr(paddq))]
111#[stable(feature = "simd_x86", since = "1.27.0")]
112pub fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
113    unsafe { transmute(simd_add(a.as_i64x2(), b.as_i64x2())) }
114}
115
116#[inline]
120#[target_feature(enable = "sse2")]
121#[cfg_attr(test, assert_instr(paddsb))]
122#[stable(feature = "simd_x86", since = "1.27.0")]
123pub fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
124    unsafe { transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16())) }
125}
126
127#[inline]
131#[target_feature(enable = "sse2")]
132#[cfg_attr(test, assert_instr(paddsw))]
133#[stable(feature = "simd_x86", since = "1.27.0")]
134pub fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
135    unsafe { transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8())) }
136}
137
138#[inline]
142#[target_feature(enable = "sse2")]
143#[cfg_attr(test, assert_instr(paddusb))]
144#[stable(feature = "simd_x86", since = "1.27.0")]
145pub fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
146    unsafe { transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16())) }
147}
148
149#[inline]
153#[target_feature(enable = "sse2")]
154#[cfg_attr(test, assert_instr(paddusw))]
155#[stable(feature = "simd_x86", since = "1.27.0")]
156pub fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
157    unsafe { transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8())) }
158}
159
160#[inline]
164#[target_feature(enable = "sse2")]
165#[cfg_attr(test, assert_instr(pavgb))]
166#[stable(feature = "simd_x86", since = "1.27.0")]
167pub fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
168    unsafe {
169        let a = simd_cast::<_, u16x16>(a.as_u8x16());
170        let b = simd_cast::<_, u16x16>(b.as_u8x16());
171        let r = simd_shr(simd_add(simd_add(a, b), u16x16::splat(1)), u16x16::splat(1));
172        transmute(simd_cast::<_, u8x16>(r))
173    }
174}
175
176#[inline]
180#[target_feature(enable = "sse2")]
181#[cfg_attr(test, assert_instr(pavgw))]
182#[stable(feature = "simd_x86", since = "1.27.0")]
183pub fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
184    unsafe {
185        let a = simd_cast::<_, u32x8>(a.as_u16x8());
186        let b = simd_cast::<_, u32x8>(b.as_u16x8());
187        let r = simd_shr(simd_add(simd_add(a, b), u32x8::splat(1)), u32x8::splat(1));
188        transmute(simd_cast::<_, u16x8>(r))
189    }
190}
191
192#[inline]
200#[target_feature(enable = "sse2")]
201#[cfg_attr(test, assert_instr(pmaddwd))]
202#[stable(feature = "simd_x86", since = "1.27.0")]
203pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
204    unsafe { transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) }
205}
206
207#[inline]
212#[target_feature(enable = "sse2")]
213#[cfg_attr(test, assert_instr(pmaxsw))]
214#[stable(feature = "simd_x86", since = "1.27.0")]
215pub fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
216    unsafe {
217        let a = a.as_i16x8();
218        let b = b.as_i16x8();
219        transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
220    }
221}
222
223#[inline]
228#[target_feature(enable = "sse2")]
229#[cfg_attr(test, assert_instr(pmaxub))]
230#[stable(feature = "simd_x86", since = "1.27.0")]
231pub fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
232    unsafe {
233        let a = a.as_u8x16();
234        let b = b.as_u8x16();
235        transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
236    }
237}
238
239#[inline]
244#[target_feature(enable = "sse2")]
245#[cfg_attr(test, assert_instr(pminsw))]
246#[stable(feature = "simd_x86", since = "1.27.0")]
247pub fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
248    unsafe {
249        let a = a.as_i16x8();
250        let b = b.as_i16x8();
251        transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
252    }
253}
254
255#[inline]
260#[target_feature(enable = "sse2")]
261#[cfg_attr(test, assert_instr(pminub))]
262#[stable(feature = "simd_x86", since = "1.27.0")]
263pub fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
264    unsafe {
265        let a = a.as_u8x16();
266        let b = b.as_u8x16();
267        transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
268    }
269}
270
271#[inline]
278#[target_feature(enable = "sse2")]
279#[cfg_attr(test, assert_instr(pmulhw))]
280#[stable(feature = "simd_x86", since = "1.27.0")]
281pub fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
282    unsafe {
283        let a = simd_cast::<_, i32x8>(a.as_i16x8());
284        let b = simd_cast::<_, i32x8>(b.as_i16x8());
285        let r = simd_shr(simd_mul(a, b), i32x8::splat(16));
286        transmute(simd_cast::<i32x8, i16x8>(r))
287    }
288}
289
290#[inline]
297#[target_feature(enable = "sse2")]
298#[cfg_attr(test, assert_instr(pmulhuw))]
299#[stable(feature = "simd_x86", since = "1.27.0")]
300pub fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
301    unsafe {
302        let a = simd_cast::<_, u32x8>(a.as_u16x8());
303        let b = simd_cast::<_, u32x8>(b.as_u16x8());
304        let r = simd_shr(simd_mul(a, b), u32x8::splat(16));
305        transmute(simd_cast::<u32x8, u16x8>(r))
306    }
307}
308
309#[inline]
316#[target_feature(enable = "sse2")]
317#[cfg_attr(test, assert_instr(pmullw))]
318#[stable(feature = "simd_x86", since = "1.27.0")]
319pub fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
320    unsafe { transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) }
321}
322
323#[inline]
330#[target_feature(enable = "sse2")]
331#[cfg_attr(test, assert_instr(pmuludq))]
332#[stable(feature = "simd_x86", since = "1.27.0")]
333pub fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
334    unsafe {
335        let a = a.as_u64x2();
336        let b = b.as_u64x2();
337        let mask = u64x2::splat(u32::MAX.into());
338        transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
339    }
340}
341
342#[inline]
351#[target_feature(enable = "sse2")]
352#[cfg_attr(test, assert_instr(psadbw))]
353#[stable(feature = "simd_x86", since = "1.27.0")]
354pub fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
355    unsafe { transmute(psadbw(a.as_u8x16(), b.as_u8x16())) }
356}
357
358#[inline]
362#[target_feature(enable = "sse2")]
363#[cfg_attr(test, assert_instr(psubb))]
364#[stable(feature = "simd_x86", since = "1.27.0")]
365pub fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
366    unsafe { transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) }
367}
368
369#[inline]
373#[target_feature(enable = "sse2")]
374#[cfg_attr(test, assert_instr(psubw))]
375#[stable(feature = "simd_x86", since = "1.27.0")]
376pub fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
377    unsafe { transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) }
378}
379
380#[inline]
384#[target_feature(enable = "sse2")]
385#[cfg_attr(test, assert_instr(psubd))]
386#[stable(feature = "simd_x86", since = "1.27.0")]
387pub fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
388    unsafe { transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) }
389}
390
391#[inline]
395#[target_feature(enable = "sse2")]
396#[cfg_attr(test, assert_instr(psubq))]
397#[stable(feature = "simd_x86", since = "1.27.0")]
398pub fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
399    unsafe { transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) }
400}
401
402#[inline]
407#[target_feature(enable = "sse2")]
408#[cfg_attr(test, assert_instr(psubsb))]
409#[stable(feature = "simd_x86", since = "1.27.0")]
410pub fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
411    unsafe { transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16())) }
412}
413
414#[inline]
419#[target_feature(enable = "sse2")]
420#[cfg_attr(test, assert_instr(psubsw))]
421#[stable(feature = "simd_x86", since = "1.27.0")]
422pub fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
423    unsafe { transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8())) }
424}
425
426#[inline]
431#[target_feature(enable = "sse2")]
432#[cfg_attr(test, assert_instr(psubusb))]
433#[stable(feature = "simd_x86", since = "1.27.0")]
434pub fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
435    unsafe { transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16())) }
436}
437
438#[inline]
443#[target_feature(enable = "sse2")]
444#[cfg_attr(test, assert_instr(psubusw))]
445#[stable(feature = "simd_x86", since = "1.27.0")]
446pub fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
447    unsafe { transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8())) }
448}
449
450#[inline]
454#[target_feature(enable = "sse2")]
455#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
456#[rustc_legacy_const_generics(1)]
457#[stable(feature = "simd_x86", since = "1.27.0")]
458pub fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
459    static_assert_uimm_bits!(IMM8, 8);
460    unsafe { _mm_slli_si128_impl::<IMM8>(a) }
461}
462
463#[inline]
466#[target_feature(enable = "sse2")]
467unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
468    const fn mask(shift: i32, i: u32) -> u32 {
469        let shift = shift as u32 & 0xff;
470        if shift > 15 { i } else { 16 - shift + i }
471    }
472    transmute::<i8x16, _>(simd_shuffle!(
473        i8x16::ZERO,
474        a.as_i8x16(),
475        [
476            mask(IMM8, 0),
477            mask(IMM8, 1),
478            mask(IMM8, 2),
479            mask(IMM8, 3),
480            mask(IMM8, 4),
481            mask(IMM8, 5),
482            mask(IMM8, 6),
483            mask(IMM8, 7),
484            mask(IMM8, 8),
485            mask(IMM8, 9),
486            mask(IMM8, 10),
487            mask(IMM8, 11),
488            mask(IMM8, 12),
489            mask(IMM8, 13),
490            mask(IMM8, 14),
491            mask(IMM8, 15),
492        ],
493    ))
494}
495
496#[inline]
500#[target_feature(enable = "sse2")]
501#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
502#[rustc_legacy_const_generics(1)]
503#[stable(feature = "simd_x86", since = "1.27.0")]
504pub fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
505    unsafe {
506        static_assert_uimm_bits!(IMM8, 8);
507        _mm_slli_si128_impl::<IMM8>(a)
508    }
509}
510
511#[inline]
515#[target_feature(enable = "sse2")]
516#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
517#[rustc_legacy_const_generics(1)]
518#[stable(feature = "simd_x86", since = "1.27.0")]
519pub fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
520    unsafe {
521        static_assert_uimm_bits!(IMM8, 8);
522        _mm_srli_si128_impl::<IMM8>(a)
523    }
524}
525
526#[inline]
530#[target_feature(enable = "sse2")]
531#[cfg_attr(test, assert_instr(psllw, IMM8 = 7))]
532#[rustc_legacy_const_generics(1)]
533#[stable(feature = "simd_x86", since = "1.27.0")]
534pub fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
535    static_assert_uimm_bits!(IMM8, 8);
536    unsafe {
537        if IMM8 >= 16 {
538            _mm_setzero_si128()
539        } else {
540            transmute(simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
541        }
542    }
543}
544
545#[inline]
550#[target_feature(enable = "sse2")]
551#[cfg_attr(test, assert_instr(psllw))]
552#[stable(feature = "simd_x86", since = "1.27.0")]
553pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
554    unsafe { transmute(psllw(a.as_i16x8(), count.as_i16x8())) }
555}
556
557#[inline]
561#[target_feature(enable = "sse2")]
562#[cfg_attr(test, assert_instr(pslld, IMM8 = 7))]
563#[rustc_legacy_const_generics(1)]
564#[stable(feature = "simd_x86", since = "1.27.0")]
565pub fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
566    static_assert_uimm_bits!(IMM8, 8);
567    unsafe {
568        if IMM8 >= 32 {
569            _mm_setzero_si128()
570        } else {
571            transmute(simd_shl(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
572        }
573    }
574}
575
576#[inline]
581#[target_feature(enable = "sse2")]
582#[cfg_attr(test, assert_instr(pslld))]
583#[stable(feature = "simd_x86", since = "1.27.0")]
584pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
585    unsafe { transmute(pslld(a.as_i32x4(), count.as_i32x4())) }
586}
587
588#[inline]
592#[target_feature(enable = "sse2")]
593#[cfg_attr(test, assert_instr(psllq, IMM8 = 7))]
594#[rustc_legacy_const_generics(1)]
595#[stable(feature = "simd_x86", since = "1.27.0")]
596pub fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
597    static_assert_uimm_bits!(IMM8, 8);
598    unsafe {
599        if IMM8 >= 64 {
600            _mm_setzero_si128()
601        } else {
602            transmute(simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
603        }
604    }
605}
606
607#[inline]
612#[target_feature(enable = "sse2")]
613#[cfg_attr(test, assert_instr(psllq))]
614#[stable(feature = "simd_x86", since = "1.27.0")]
615pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
616    unsafe { transmute(psllq(a.as_i64x2(), count.as_i64x2())) }
617}
618
619#[inline]
624#[target_feature(enable = "sse2")]
625#[cfg_attr(test, assert_instr(psraw, IMM8 = 1))]
626#[rustc_legacy_const_generics(1)]
627#[stable(feature = "simd_x86", since = "1.27.0")]
628pub fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
629    static_assert_uimm_bits!(IMM8, 8);
630    unsafe { transmute(simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16))) }
631}
632
633#[inline]
638#[target_feature(enable = "sse2")]
639#[cfg_attr(test, assert_instr(psraw))]
640#[stable(feature = "simd_x86", since = "1.27.0")]
641pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
642    unsafe { transmute(psraw(a.as_i16x8(), count.as_i16x8())) }
643}
644
645#[inline]
650#[target_feature(enable = "sse2")]
651#[cfg_attr(test, assert_instr(psrad, IMM8 = 1))]
652#[rustc_legacy_const_generics(1)]
653#[stable(feature = "simd_x86", since = "1.27.0")]
654pub fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
655    static_assert_uimm_bits!(IMM8, 8);
656    unsafe { transmute(simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31)))) }
657}
658
659#[inline]
664#[target_feature(enable = "sse2")]
665#[cfg_attr(test, assert_instr(psrad))]
666#[stable(feature = "simd_x86", since = "1.27.0")]
667pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
668    unsafe { transmute(psrad(a.as_i32x4(), count.as_i32x4())) }
669}
670
671#[inline]
675#[target_feature(enable = "sse2")]
676#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
677#[rustc_legacy_const_generics(1)]
678#[stable(feature = "simd_x86", since = "1.27.0")]
679pub fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
680    static_assert_uimm_bits!(IMM8, 8);
681    unsafe { _mm_srli_si128_impl::<IMM8>(a) }
682}
683
684#[inline]
687#[target_feature(enable = "sse2")]
688unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
689    const fn mask(shift: i32, i: u32) -> u32 {
690        if (shift as u32) > 15 {
691            i + 16
692        } else {
693            i + (shift as u32)
694        }
695    }
696    let x: i8x16 = simd_shuffle!(
697        a.as_i8x16(),
698        i8x16::ZERO,
699        [
700            mask(IMM8, 0),
701            mask(IMM8, 1),
702            mask(IMM8, 2),
703            mask(IMM8, 3),
704            mask(IMM8, 4),
705            mask(IMM8, 5),
706            mask(IMM8, 6),
707            mask(IMM8, 7),
708            mask(IMM8, 8),
709            mask(IMM8, 9),
710            mask(IMM8, 10),
711            mask(IMM8, 11),
712            mask(IMM8, 12),
713            mask(IMM8, 13),
714            mask(IMM8, 14),
715            mask(IMM8, 15),
716        ],
717    );
718    transmute(x)
719}
720
721#[inline]
726#[target_feature(enable = "sse2")]
727#[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))]
728#[rustc_legacy_const_generics(1)]
729#[stable(feature = "simd_x86", since = "1.27.0")]
730pub fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
731    static_assert_uimm_bits!(IMM8, 8);
732    unsafe {
733        if IMM8 >= 16 {
734            _mm_setzero_si128()
735        } else {
736            transmute(simd_shr(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
737        }
738    }
739}
740
741#[inline]
746#[target_feature(enable = "sse2")]
747#[cfg_attr(test, assert_instr(psrlw))]
748#[stable(feature = "simd_x86", since = "1.27.0")]
749pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
750    unsafe { transmute(psrlw(a.as_i16x8(), count.as_i16x8())) }
751}
752
753#[inline]
758#[target_feature(enable = "sse2")]
759#[cfg_attr(test, assert_instr(psrld, IMM8 = 8))]
760#[rustc_legacy_const_generics(1)]
761#[stable(feature = "simd_x86", since = "1.27.0")]
762pub fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
763    static_assert_uimm_bits!(IMM8, 8);
764    unsafe {
765        if IMM8 >= 32 {
766            _mm_setzero_si128()
767        } else {
768            transmute(simd_shr(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
769        }
770    }
771}
772
773#[inline]
778#[target_feature(enable = "sse2")]
779#[cfg_attr(test, assert_instr(psrld))]
780#[stable(feature = "simd_x86", since = "1.27.0")]
781pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
782    unsafe { transmute(psrld(a.as_i32x4(), count.as_i32x4())) }
783}
784
785#[inline]
790#[target_feature(enable = "sse2")]
791#[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))]
792#[rustc_legacy_const_generics(1)]
793#[stable(feature = "simd_x86", since = "1.27.0")]
794pub fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
795    static_assert_uimm_bits!(IMM8, 8);
796    unsafe {
797        if IMM8 >= 64 {
798            _mm_setzero_si128()
799        } else {
800            transmute(simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
801        }
802    }
803}
804
805#[inline]
810#[target_feature(enable = "sse2")]
811#[cfg_attr(test, assert_instr(psrlq))]
812#[stable(feature = "simd_x86", since = "1.27.0")]
813pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
814    unsafe { transmute(psrlq(a.as_i64x2(), count.as_i64x2())) }
815}
816
817#[inline]
822#[target_feature(enable = "sse2")]
823#[cfg_attr(test, assert_instr(andps))]
824#[stable(feature = "simd_x86", since = "1.27.0")]
825pub fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
826    unsafe { simd_and(a, b) }
827}
828
829#[inline]
834#[target_feature(enable = "sse2")]
835#[cfg_attr(test, assert_instr(andnps))]
836#[stable(feature = "simd_x86", since = "1.27.0")]
837pub fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
838    unsafe { simd_and(simd_xor(_mm_set1_epi8(-1), a), b) }
839}
840
841#[inline]
846#[target_feature(enable = "sse2")]
847#[cfg_attr(test, assert_instr(orps))]
848#[stable(feature = "simd_x86", since = "1.27.0")]
849pub fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
850    unsafe { simd_or(a, b) }
851}
852
853#[inline]
858#[target_feature(enable = "sse2")]
859#[cfg_attr(test, assert_instr(xorps))]
860#[stable(feature = "simd_x86", since = "1.27.0")]
861pub fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
862    unsafe { simd_xor(a, b) }
863}
864
865#[inline]
869#[target_feature(enable = "sse2")]
870#[cfg_attr(test, assert_instr(pcmpeqb))]
871#[stable(feature = "simd_x86", since = "1.27.0")]
872pub fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
873    unsafe { transmute::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
874}
875
876#[inline]
880#[target_feature(enable = "sse2")]
881#[cfg_attr(test, assert_instr(pcmpeqw))]
882#[stable(feature = "simd_x86", since = "1.27.0")]
883pub fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
884    unsafe { transmute::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
885}
886
887#[inline]
891#[target_feature(enable = "sse2")]
892#[cfg_attr(test, assert_instr(pcmpeqd))]
893#[stable(feature = "simd_x86", since = "1.27.0")]
894pub fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
895    unsafe { transmute::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) }
896}
897
898#[inline]
902#[target_feature(enable = "sse2")]
903#[cfg_attr(test, assert_instr(pcmpgtb))]
904#[stable(feature = "simd_x86", since = "1.27.0")]
905pub fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
906    unsafe { transmute::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
907}
908
909#[inline]
913#[target_feature(enable = "sse2")]
914#[cfg_attr(test, assert_instr(pcmpgtw))]
915#[stable(feature = "simd_x86", since = "1.27.0")]
916pub fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
917    unsafe { transmute::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
918}
919
920#[inline]
924#[target_feature(enable = "sse2")]
925#[cfg_attr(test, assert_instr(pcmpgtd))]
926#[stable(feature = "simd_x86", since = "1.27.0")]
927pub fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
928    unsafe { transmute::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) }
929}
930
931#[inline]
935#[target_feature(enable = "sse2")]
936#[cfg_attr(test, assert_instr(pcmpgtb))]
937#[stable(feature = "simd_x86", since = "1.27.0")]
938pub fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
939    unsafe { transmute::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
940}
941
942#[inline]
946#[target_feature(enable = "sse2")]
947#[cfg_attr(test, assert_instr(pcmpgtw))]
948#[stable(feature = "simd_x86", since = "1.27.0")]
949pub fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
950    unsafe { transmute::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
951}
952
953#[inline]
957#[target_feature(enable = "sse2")]
958#[cfg_attr(test, assert_instr(pcmpgtd))]
959#[stable(feature = "simd_x86", since = "1.27.0")]
960pub fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
961    unsafe { transmute::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) }
962}
963
964#[inline]
969#[target_feature(enable = "sse2")]
970#[cfg_attr(test, assert_instr(cvtdq2pd))]
971#[stable(feature = "simd_x86", since = "1.27.0")]
972pub fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
973    unsafe {
974        let a = a.as_i32x4();
975        simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [0, 1]))
976    }
977}
978
979#[inline]
984#[target_feature(enable = "sse2")]
985#[cfg_attr(test, assert_instr(cvtsi2sd))]
986#[stable(feature = "simd_x86", since = "1.27.0")]
987pub fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
988    unsafe { simd_insert!(a, 0, b as f64) }
989}
990
991#[inline]
996#[target_feature(enable = "sse2")]
997#[cfg_attr(test, assert_instr(cvtdq2ps))]
998#[stable(feature = "simd_x86", since = "1.27.0")]
999pub fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
1000    unsafe { transmute(simd_cast::<_, f32x4>(a.as_i32x4())) }
1001}
1002
1003#[inline]
1008#[target_feature(enable = "sse2")]
1009#[cfg_attr(test, assert_instr(cvtps2dq))]
1010#[stable(feature = "simd_x86", since = "1.27.0")]
1011pub fn _mm_cvtps_epi32(a: __m128) -> __m128i {
1012    unsafe { transmute(cvtps2dq(a)) }
1013}
1014
1015#[inline]
1020#[target_feature(enable = "sse2")]
1021#[stable(feature = "simd_x86", since = "1.27.0")]
1022pub fn _mm_cvtsi32_si128(a: i32) -> __m128i {
1023    unsafe { transmute(i32x4::new(a, 0, 0, 0)) }
1024}
1025
1026#[inline]
1030#[target_feature(enable = "sse2")]
1031#[stable(feature = "simd_x86", since = "1.27.0")]
1032pub fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
1033    unsafe { simd_extract!(a.as_i32x4(), 0) }
1034}
1035
1036#[inline]
1041#[target_feature(enable = "sse2")]
1042#[stable(feature = "simd_x86", since = "1.27.0")]
1044pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
1045    unsafe { transmute(i64x2::new(e0, e1)) }
1046}
1047
1048#[inline]
1052#[target_feature(enable = "sse2")]
1053#[stable(feature = "simd_x86", since = "1.27.0")]
1055pub fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1056    unsafe { transmute(i32x4::new(e0, e1, e2, e3)) }
1057}
1058
1059#[inline]
1063#[target_feature(enable = "sse2")]
1064#[stable(feature = "simd_x86", since = "1.27.0")]
1066pub fn _mm_set_epi16(
1067    e7: i16,
1068    e6: i16,
1069    e5: i16,
1070    e4: i16,
1071    e3: i16,
1072    e2: i16,
1073    e1: i16,
1074    e0: i16,
1075) -> __m128i {
1076    unsafe { transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) }
1077}
1078
1079#[inline]
1083#[target_feature(enable = "sse2")]
1084#[stable(feature = "simd_x86", since = "1.27.0")]
1086pub fn _mm_set_epi8(
1087    e15: i8,
1088    e14: i8,
1089    e13: i8,
1090    e12: i8,
1091    e11: i8,
1092    e10: i8,
1093    e9: i8,
1094    e8: i8,
1095    e7: i8,
1096    e6: i8,
1097    e5: i8,
1098    e4: i8,
1099    e3: i8,
1100    e2: i8,
1101    e1: i8,
1102    e0: i8,
1103) -> __m128i {
1104    unsafe {
1105        #[rustfmt::skip]
1106        transmute(i8x16::new(
1107            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1108        ))
1109    }
1110}
1111
1112#[inline]
1116#[target_feature(enable = "sse2")]
1117#[stable(feature = "simd_x86", since = "1.27.0")]
1119pub fn _mm_set1_epi64x(a: i64) -> __m128i {
1120    _mm_set_epi64x(a, a)
1121}
1122
1123#[inline]
1127#[target_feature(enable = "sse2")]
1128#[stable(feature = "simd_x86", since = "1.27.0")]
1130pub fn _mm_set1_epi32(a: i32) -> __m128i {
1131    _mm_set_epi32(a, a, a, a)
1132}
1133
1134#[inline]
1138#[target_feature(enable = "sse2")]
1139#[stable(feature = "simd_x86", since = "1.27.0")]
1141pub fn _mm_set1_epi16(a: i16) -> __m128i {
1142    _mm_set_epi16(a, a, a, a, a, a, a, a)
1143}
1144
1145#[inline]
1149#[target_feature(enable = "sse2")]
1150#[stable(feature = "simd_x86", since = "1.27.0")]
1152pub fn _mm_set1_epi8(a: i8) -> __m128i {
1153    _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
1154}
1155
1156#[inline]
1160#[target_feature(enable = "sse2")]
1161#[stable(feature = "simd_x86", since = "1.27.0")]
1163pub fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1164    _mm_set_epi32(e0, e1, e2, e3)
1165}
1166
1167#[inline]
1171#[target_feature(enable = "sse2")]
1172#[stable(feature = "simd_x86", since = "1.27.0")]
1174pub fn _mm_setr_epi16(
1175    e7: i16,
1176    e6: i16,
1177    e5: i16,
1178    e4: i16,
1179    e3: i16,
1180    e2: i16,
1181    e1: i16,
1182    e0: i16,
1183) -> __m128i {
1184    _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7)
1185}
1186
1187#[inline]
1191#[target_feature(enable = "sse2")]
1192#[stable(feature = "simd_x86", since = "1.27.0")]
1194pub fn _mm_setr_epi8(
1195    e15: i8,
1196    e14: i8,
1197    e13: i8,
1198    e12: i8,
1199    e11: i8,
1200    e10: i8,
1201    e9: i8,
1202    e8: i8,
1203    e7: i8,
1204    e6: i8,
1205    e5: i8,
1206    e4: i8,
1207    e3: i8,
1208    e2: i8,
1209    e1: i8,
1210    e0: i8,
1211) -> __m128i {
1212    #[rustfmt::skip]
1213    _mm_set_epi8(
1214        e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1215    )
1216}
1217
1218#[inline]
1222#[target_feature(enable = "sse2")]
1223#[cfg_attr(test, assert_instr(xorps))]
1224#[stable(feature = "simd_x86", since = "1.27.0")]
1225pub fn _mm_setzero_si128() -> __m128i {
1226    const { unsafe { mem::zeroed() } }
1227}
1228
1229#[inline]
1233#[target_feature(enable = "sse2")]
1234#[stable(feature = "simd_x86", since = "1.27.0")]
1235pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
1236    _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64))
1237}
1238
1239#[inline]
1245#[target_feature(enable = "sse2")]
1246#[cfg_attr(test, assert_instr(movaps))]
1247#[stable(feature = "simd_x86", since = "1.27.0")]
1248pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
1249    *mem_addr
1250}
1251
1252#[inline]
1258#[target_feature(enable = "sse2")]
1259#[cfg_attr(test, assert_instr(movups))]
1260#[stable(feature = "simd_x86", since = "1.27.0")]
1261pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
1262    let mut dst: __m128i = _mm_undefined_si128();
1263    ptr::copy_nonoverlapping(
1264        mem_addr as *const u8,
1265        ptr::addr_of_mut!(dst) as *mut u8,
1266        mem::size_of::<__m128i>(),
1267    );
1268    dst
1269}
1270
1271#[inline]
1282#[target_feature(enable = "sse2")]
1283#[cfg_attr(test, assert_instr(maskmovdqu))]
1284#[stable(feature = "simd_x86", since = "1.27.0")]
1285pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
1286    maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
1287}
1288
1289#[inline]
1295#[target_feature(enable = "sse2")]
1296#[cfg_attr(test, assert_instr(movaps))]
1297#[stable(feature = "simd_x86", since = "1.27.0")]
1298pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
1299    *mem_addr = a;
1300}
1301
1302#[inline]
1308#[target_feature(enable = "sse2")]
1309#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
1311pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
1312    mem_addr.write_unaligned(a);
1313}
1314
1315#[inline]
1321#[target_feature(enable = "sse2")]
1322#[stable(feature = "simd_x86", since = "1.27.0")]
1323pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
1324    ptr::copy_nonoverlapping(ptr::addr_of!(a) as *const u8, mem_addr as *mut u8, 8);
1325}
1326
1327#[inline]
1342#[target_feature(enable = "sse2")]
1343#[cfg_attr(test, assert_instr(movntdq))]
1344#[stable(feature = "simd_x86", since = "1.27.0")]
1345pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
1346    crate::arch::asm!(
1347        vps!("movntdq",  ",{a}"),
1348        p = in(reg) mem_addr,
1349        a = in(xmm_reg) a,
1350        options(nostack, preserves_flags),
1351    );
1352}
1353
1354#[inline]
1369#[target_feature(enable = "sse2")]
1370#[cfg_attr(test, assert_instr(movnti))]
1371#[stable(feature = "simd_x86", since = "1.27.0")]
1372pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
1373    crate::arch::asm!(
1374        vps!("movnti", ",{a:e}"), p = in(reg) mem_addr,
1376        a = in(reg) a,
1377        options(nostack, preserves_flags),
1378    );
1379}
1380
1381#[inline]
1386#[target_feature(enable = "sse2")]
1387#[cfg_attr(
1389    all(test, not(target_env = "msvc"), target_arch = "x86_64"),
1390    assert_instr(movq)
1391)]
1392#[stable(feature = "simd_x86", since = "1.27.0")]
1393pub fn _mm_move_epi64(a: __m128i) -> __m128i {
1394    unsafe {
1395        let r: i64x2 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 2]);
1396        transmute(r)
1397    }
1398}
1399
1400#[inline]
1405#[target_feature(enable = "sse2")]
1406#[cfg_attr(test, assert_instr(packsswb))]
1407#[stable(feature = "simd_x86", since = "1.27.0")]
1408pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
1409    unsafe { transmute(packsswb(a.as_i16x8(), b.as_i16x8())) }
1410}
1411
1412#[inline]
1417#[target_feature(enable = "sse2")]
1418#[cfg_attr(test, assert_instr(packssdw))]
1419#[stable(feature = "simd_x86", since = "1.27.0")]
1420pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
1421    unsafe { transmute(packssdw(a.as_i32x4(), b.as_i32x4())) }
1422}
1423
1424#[inline]
1429#[target_feature(enable = "sse2")]
1430#[cfg_attr(test, assert_instr(packuswb))]
1431#[stable(feature = "simd_x86", since = "1.27.0")]
1432pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
1433    unsafe { transmute(packuswb(a.as_i16x8(), b.as_i16x8())) }
1434}
1435
1436#[inline]
1440#[target_feature(enable = "sse2")]
1441#[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))]
1442#[rustc_legacy_const_generics(1)]
1443#[stable(feature = "simd_x86", since = "1.27.0")]
1444pub fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
1445    static_assert_uimm_bits!(IMM8, 3);
1446    unsafe { simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32 }
1447}
1448
1449#[inline]
1453#[target_feature(enable = "sse2")]
1454#[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))]
1455#[rustc_legacy_const_generics(2)]
1456#[stable(feature = "simd_x86", since = "1.27.0")]
1457pub fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
1458    static_assert_uimm_bits!(IMM8, 3);
1459    unsafe { transmute(simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16)) }
1460}
1461
1462#[inline]
1466#[target_feature(enable = "sse2")]
1467#[cfg_attr(test, assert_instr(pmovmskb))]
1468#[stable(feature = "simd_x86", since = "1.27.0")]
1469pub fn _mm_movemask_epi8(a: __m128i) -> i32 {
1470    unsafe {
1471        let z = i8x16::ZERO;
1472        let m: i8x16 = simd_lt(a.as_i8x16(), z);
1473        simd_bitmask::<_, u16>(m) as u32 as i32
1474    }
1475}
1476
1477#[inline]
1481#[target_feature(enable = "sse2")]
1482#[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))]
1483#[rustc_legacy_const_generics(1)]
1484#[stable(feature = "simd_x86", since = "1.27.0")]
1485pub fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
1486    static_assert_uimm_bits!(IMM8, 8);
1487    unsafe {
1488        let a = a.as_i32x4();
1489        let x: i32x4 = simd_shuffle!(
1490            a,
1491            a,
1492            [
1493                IMM8 as u32 & 0b11,
1494                (IMM8 as u32 >> 2) & 0b11,
1495                (IMM8 as u32 >> 4) & 0b11,
1496                (IMM8 as u32 >> 6) & 0b11,
1497            ],
1498        );
1499        transmute(x)
1500    }
1501}
1502
1503#[inline]
1511#[target_feature(enable = "sse2")]
1512#[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))]
1513#[rustc_legacy_const_generics(1)]
1514#[stable(feature = "simd_x86", since = "1.27.0")]
1515pub fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1516    static_assert_uimm_bits!(IMM8, 8);
1517    unsafe {
1518        let a = a.as_i16x8();
1519        let x: i16x8 = simd_shuffle!(
1520            a,
1521            a,
1522            [
1523                0,
1524                1,
1525                2,
1526                3,
1527                (IMM8 as u32 & 0b11) + 4,
1528                ((IMM8 as u32 >> 2) & 0b11) + 4,
1529                ((IMM8 as u32 >> 4) & 0b11) + 4,
1530                ((IMM8 as u32 >> 6) & 0b11) + 4,
1531            ],
1532        );
1533        transmute(x)
1534    }
1535}
1536
1537#[inline]
1545#[target_feature(enable = "sse2")]
1546#[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))]
1547#[rustc_legacy_const_generics(1)]
1548#[stable(feature = "simd_x86", since = "1.27.0")]
1549pub fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1550    static_assert_uimm_bits!(IMM8, 8);
1551    unsafe {
1552        let a = a.as_i16x8();
1553        let x: i16x8 = simd_shuffle!(
1554            a,
1555            a,
1556            [
1557                IMM8 as u32 & 0b11,
1558                (IMM8 as u32 >> 2) & 0b11,
1559                (IMM8 as u32 >> 4) & 0b11,
1560                (IMM8 as u32 >> 6) & 0b11,
1561                4,
1562                5,
1563                6,
1564                7,
1565            ],
1566        );
1567        transmute(x)
1568    }
1569}
1570
1571#[inline]
1575#[target_feature(enable = "sse2")]
1576#[cfg_attr(test, assert_instr(punpckhbw))]
1577#[stable(feature = "simd_x86", since = "1.27.0")]
1578pub fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
1579    unsafe {
1580        transmute::<i8x16, _>(simd_shuffle!(
1581            a.as_i8x16(),
1582            b.as_i8x16(),
1583            [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
1584        ))
1585    }
1586}
1587
1588#[inline]
1592#[target_feature(enable = "sse2")]
1593#[cfg_attr(test, assert_instr(punpckhwd))]
1594#[stable(feature = "simd_x86", since = "1.27.0")]
1595pub fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
1596    unsafe {
1597        let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
1598        transmute::<i16x8, _>(x)
1599    }
1600}
1601
1602#[inline]
1606#[target_feature(enable = "sse2")]
1607#[cfg_attr(test, assert_instr(unpckhps))]
1608#[stable(feature = "simd_x86", since = "1.27.0")]
1609pub fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
1610    unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) }
1611}
1612
1613#[inline]
1617#[target_feature(enable = "sse2")]
1618#[cfg_attr(test, assert_instr(unpckhpd))]
1619#[stable(feature = "simd_x86", since = "1.27.0")]
1620pub fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
1621    unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3])) }
1622}
1623
1624#[inline]
1628#[target_feature(enable = "sse2")]
1629#[cfg_attr(test, assert_instr(punpcklbw))]
1630#[stable(feature = "simd_x86", since = "1.27.0")]
1631pub fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
1632    unsafe {
1633        transmute::<i8x16, _>(simd_shuffle!(
1634            a.as_i8x16(),
1635            b.as_i8x16(),
1636            [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
1637        ))
1638    }
1639}
1640
1641#[inline]
1645#[target_feature(enable = "sse2")]
1646#[cfg_attr(test, assert_instr(punpcklwd))]
1647#[stable(feature = "simd_x86", since = "1.27.0")]
1648pub fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
1649    unsafe {
1650        let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
1651        transmute::<i16x8, _>(x)
1652    }
1653}
1654
1655#[inline]
1659#[target_feature(enable = "sse2")]
1660#[cfg_attr(test, assert_instr(unpcklps))]
1661#[stable(feature = "simd_x86", since = "1.27.0")]
1662pub fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
1663    unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) }
1664}
1665
1666#[inline]
1670#[target_feature(enable = "sse2")]
1671#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlhps))]
1672#[stable(feature = "simd_x86", since = "1.27.0")]
1673pub fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
1674    unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2])) }
1675}
1676
1677#[inline]
1682#[target_feature(enable = "sse2")]
1683#[cfg_attr(test, assert_instr(addsd))]
1684#[stable(feature = "simd_x86", since = "1.27.0")]
1685pub fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
1686    unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) }
1687}
1688
1689#[inline]
1694#[target_feature(enable = "sse2")]
1695#[cfg_attr(test, assert_instr(addpd))]
1696#[stable(feature = "simd_x86", since = "1.27.0")]
1697pub fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
1698    unsafe { simd_add(a, b) }
1699}
1700
1701#[inline]
1706#[target_feature(enable = "sse2")]
1707#[cfg_attr(test, assert_instr(divsd))]
1708#[stable(feature = "simd_x86", since = "1.27.0")]
1709pub fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
1710    unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) }
1711}
1712
1713#[inline]
1718#[target_feature(enable = "sse2")]
1719#[cfg_attr(test, assert_instr(divpd))]
1720#[stable(feature = "simd_x86", since = "1.27.0")]
1721pub fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
1722    unsafe { simd_div(a, b) }
1723}
1724
1725#[inline]
1730#[target_feature(enable = "sse2")]
1731#[cfg_attr(test, assert_instr(maxsd))]
1732#[stable(feature = "simd_x86", since = "1.27.0")]
1733pub fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
1734    unsafe { maxsd(a, b) }
1735}
1736
1737#[inline]
1742#[target_feature(enable = "sse2")]
1743#[cfg_attr(test, assert_instr(maxpd))]
1744#[stable(feature = "simd_x86", since = "1.27.0")]
1745pub fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
1746    unsafe { maxpd(a, b) }
1747}
1748
1749#[inline]
1754#[target_feature(enable = "sse2")]
1755#[cfg_attr(test, assert_instr(minsd))]
1756#[stable(feature = "simd_x86", since = "1.27.0")]
1757pub fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
1758    unsafe { minsd(a, b) }
1759}
1760
1761#[inline]
1766#[target_feature(enable = "sse2")]
1767#[cfg_attr(test, assert_instr(minpd))]
1768#[stable(feature = "simd_x86", since = "1.27.0")]
1769pub fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
1770    unsafe { minpd(a, b) }
1771}
1772
1773#[inline]
1778#[target_feature(enable = "sse2")]
1779#[cfg_attr(test, assert_instr(mulsd))]
1780#[stable(feature = "simd_x86", since = "1.27.0")]
1781pub fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
1782    unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) }
1783}
1784
1785#[inline]
1790#[target_feature(enable = "sse2")]
1791#[cfg_attr(test, assert_instr(mulpd))]
1792#[stable(feature = "simd_x86", since = "1.27.0")]
1793pub fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
1794    unsafe { simd_mul(a, b) }
1795}
1796
1797#[inline]
1802#[target_feature(enable = "sse2")]
1803#[cfg_attr(test, assert_instr(sqrtsd))]
1804#[stable(feature = "simd_x86", since = "1.27.0")]
1805pub fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
1806    unsafe { simd_insert!(a, 0, sqrtf64(_mm_cvtsd_f64(b))) }
1807}
1808
1809#[inline]
1813#[target_feature(enable = "sse2")]
1814#[cfg_attr(test, assert_instr(sqrtpd))]
1815#[stable(feature = "simd_x86", since = "1.27.0")]
1816pub fn _mm_sqrt_pd(a: __m128d) -> __m128d {
1817    unsafe { simd_fsqrt(a) }
1818}
1819
1820#[inline]
1825#[target_feature(enable = "sse2")]
1826#[cfg_attr(test, assert_instr(subsd))]
1827#[stable(feature = "simd_x86", since = "1.27.0")]
1828pub fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
1829    unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) }
1830}
1831
1832#[inline]
1837#[target_feature(enable = "sse2")]
1838#[cfg_attr(test, assert_instr(subpd))]
1839#[stable(feature = "simd_x86", since = "1.27.0")]
1840pub fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
1841    unsafe { simd_sub(a, b) }
1842}
1843
1844#[inline]
1849#[target_feature(enable = "sse2")]
1850#[cfg_attr(test, assert_instr(andps))]
1851#[stable(feature = "simd_x86", since = "1.27.0")]
1852pub fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
1853    unsafe {
1854        let a: __m128i = transmute(a);
1855        let b: __m128i = transmute(b);
1856        transmute(_mm_and_si128(a, b))
1857    }
1858}
1859
1860#[inline]
1864#[target_feature(enable = "sse2")]
1865#[cfg_attr(test, assert_instr(andnps))]
1866#[stable(feature = "simd_x86", since = "1.27.0")]
1867pub fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
1868    unsafe {
1869        let a: __m128i = transmute(a);
1870        let b: __m128i = transmute(b);
1871        transmute(_mm_andnot_si128(a, b))
1872    }
1873}
1874
1875#[inline]
1879#[target_feature(enable = "sse2")]
1880#[cfg_attr(test, assert_instr(orps))]
1881#[stable(feature = "simd_x86", since = "1.27.0")]
1882pub fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
1883    unsafe {
1884        let a: __m128i = transmute(a);
1885        let b: __m128i = transmute(b);
1886        transmute(_mm_or_si128(a, b))
1887    }
1888}
1889
1890#[inline]
1894#[target_feature(enable = "sse2")]
1895#[cfg_attr(test, assert_instr(xorps))]
1896#[stable(feature = "simd_x86", since = "1.27.0")]
1897pub fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
1898    unsafe {
1899        let a: __m128i = transmute(a);
1900        let b: __m128i = transmute(b);
1901        transmute(_mm_xor_si128(a, b))
1902    }
1903}
1904
1905#[inline]
1910#[target_feature(enable = "sse2")]
1911#[cfg_attr(test, assert_instr(cmpeqsd))]
1912#[stable(feature = "simd_x86", since = "1.27.0")]
1913pub fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
1914    unsafe { cmpsd(a, b, 0) }
1915}
1916
1917#[inline]
1922#[target_feature(enable = "sse2")]
1923#[cfg_attr(test, assert_instr(cmpltsd))]
1924#[stable(feature = "simd_x86", since = "1.27.0")]
1925pub fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
1926    unsafe { cmpsd(a, b, 1) }
1927}
1928
1929#[inline]
1934#[target_feature(enable = "sse2")]
1935#[cfg_attr(test, assert_instr(cmplesd))]
1936#[stable(feature = "simd_x86", since = "1.27.0")]
1937pub fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
1938    unsafe { cmpsd(a, b, 2) }
1939}
1940
1941#[inline]
1946#[target_feature(enable = "sse2")]
1947#[cfg_attr(test, assert_instr(cmpltsd))]
1948#[stable(feature = "simd_x86", since = "1.27.0")]
1949pub fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
1950    unsafe { simd_insert!(_mm_cmplt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
1951}
1952
1953#[inline]
1958#[target_feature(enable = "sse2")]
1959#[cfg_attr(test, assert_instr(cmplesd))]
1960#[stable(feature = "simd_x86", since = "1.27.0")]
1961pub fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
1962    unsafe { simd_insert!(_mm_cmple_sd(b, a), 1, simd_extract!(a, 1, f64)) }
1963}
1964
1965#[inline]
1972#[target_feature(enable = "sse2")]
1973#[cfg_attr(test, assert_instr(cmpordsd))]
1974#[stable(feature = "simd_x86", since = "1.27.0")]
1975pub fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
1976    unsafe { cmpsd(a, b, 7) }
1977}
1978
1979#[inline]
1985#[target_feature(enable = "sse2")]
1986#[cfg_attr(test, assert_instr(cmpunordsd))]
1987#[stable(feature = "simd_x86", since = "1.27.0")]
1988pub fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
1989    unsafe { cmpsd(a, b, 3) }
1990}
1991
1992#[inline]
1997#[target_feature(enable = "sse2")]
1998#[cfg_attr(test, assert_instr(cmpneqsd))]
1999#[stable(feature = "simd_x86", since = "1.27.0")]
2000pub fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
2001    unsafe { cmpsd(a, b, 4) }
2002}
2003
2004#[inline]
2009#[target_feature(enable = "sse2")]
2010#[cfg_attr(test, assert_instr(cmpnltsd))]
2011#[stable(feature = "simd_x86", since = "1.27.0")]
2012pub fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
2013    unsafe { cmpsd(a, b, 5) }
2014}
2015
2016#[inline]
2021#[target_feature(enable = "sse2")]
2022#[cfg_attr(test, assert_instr(cmpnlesd))]
2023#[stable(feature = "simd_x86", since = "1.27.0")]
2024pub fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
2025    unsafe { cmpsd(a, b, 6) }
2026}
2027
2028#[inline]
2033#[target_feature(enable = "sse2")]
2034#[cfg_attr(test, assert_instr(cmpnltsd))]
2035#[stable(feature = "simd_x86", since = "1.27.0")]
2036pub fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
2037    unsafe { simd_insert!(_mm_cmpnlt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2038}
2039
2040#[inline]
2045#[target_feature(enable = "sse2")]
2046#[cfg_attr(test, assert_instr(cmpnlesd))]
2047#[stable(feature = "simd_x86", since = "1.27.0")]
2048pub fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
2049    unsafe { simd_insert!(_mm_cmpnle_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2050}
2051
2052#[inline]
2056#[target_feature(enable = "sse2")]
2057#[cfg_attr(test, assert_instr(cmpeqpd))]
2058#[stable(feature = "simd_x86", since = "1.27.0")]
2059pub fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
2060    unsafe { cmppd(a, b, 0) }
2061}
2062
2063#[inline]
2067#[target_feature(enable = "sse2")]
2068#[cfg_attr(test, assert_instr(cmpltpd))]
2069#[stable(feature = "simd_x86", since = "1.27.0")]
2070pub fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
2071    unsafe { cmppd(a, b, 1) }
2072}
2073
2074#[inline]
2078#[target_feature(enable = "sse2")]
2079#[cfg_attr(test, assert_instr(cmplepd))]
2080#[stable(feature = "simd_x86", since = "1.27.0")]
2081pub fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
2082    unsafe { cmppd(a, b, 2) }
2083}
2084
2085#[inline]
2089#[target_feature(enable = "sse2")]
2090#[cfg_attr(test, assert_instr(cmpltpd))]
2091#[stable(feature = "simd_x86", since = "1.27.0")]
2092pub fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
2093    _mm_cmplt_pd(b, a)
2094}
2095
2096#[inline]
2100#[target_feature(enable = "sse2")]
2101#[cfg_attr(test, assert_instr(cmplepd))]
2102#[stable(feature = "simd_x86", since = "1.27.0")]
2103pub fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
2104    _mm_cmple_pd(b, a)
2105}
2106
2107#[inline]
2111#[target_feature(enable = "sse2")]
2112#[cfg_attr(test, assert_instr(cmpordpd))]
2113#[stable(feature = "simd_x86", since = "1.27.0")]
2114pub fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
2115    unsafe { cmppd(a, b, 7) }
2116}
2117
2118#[inline]
2122#[target_feature(enable = "sse2")]
2123#[cfg_attr(test, assert_instr(cmpunordpd))]
2124#[stable(feature = "simd_x86", since = "1.27.0")]
2125pub fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
2126    unsafe { cmppd(a, b, 3) }
2127}
2128
2129#[inline]
2133#[target_feature(enable = "sse2")]
2134#[cfg_attr(test, assert_instr(cmpneqpd))]
2135#[stable(feature = "simd_x86", since = "1.27.0")]
2136pub fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
2137    unsafe { cmppd(a, b, 4) }
2138}
2139
2140#[inline]
2144#[target_feature(enable = "sse2")]
2145#[cfg_attr(test, assert_instr(cmpnltpd))]
2146#[stable(feature = "simd_x86", since = "1.27.0")]
2147pub fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
2148    unsafe { cmppd(a, b, 5) }
2149}
2150
2151#[inline]
2155#[target_feature(enable = "sse2")]
2156#[cfg_attr(test, assert_instr(cmpnlepd))]
2157#[stable(feature = "simd_x86", since = "1.27.0")]
2158pub fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
2159    unsafe { cmppd(a, b, 6) }
2160}
2161
2162#[inline]
2166#[target_feature(enable = "sse2")]
2167#[cfg_attr(test, assert_instr(cmpnltpd))]
2168#[stable(feature = "simd_x86", since = "1.27.0")]
2169pub fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
2170    _mm_cmpnlt_pd(b, a)
2171}
2172
2173#[inline]
2178#[target_feature(enable = "sse2")]
2179#[cfg_attr(test, assert_instr(cmpnlepd))]
2180#[stable(feature = "simd_x86", since = "1.27.0")]
2181pub fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
2182    _mm_cmpnle_pd(b, a)
2183}
2184
2185#[inline]
2189#[target_feature(enable = "sse2")]
2190#[cfg_attr(test, assert_instr(comisd))]
2191#[stable(feature = "simd_x86", since = "1.27.0")]
2192pub fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
2193    unsafe { comieqsd(a, b) }
2194}
2195
2196#[inline]
2200#[target_feature(enable = "sse2")]
2201#[cfg_attr(test, assert_instr(comisd))]
2202#[stable(feature = "simd_x86", since = "1.27.0")]
2203pub fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
2204    unsafe { comiltsd(a, b) }
2205}
2206
2207#[inline]
2211#[target_feature(enable = "sse2")]
2212#[cfg_attr(test, assert_instr(comisd))]
2213#[stable(feature = "simd_x86", since = "1.27.0")]
2214pub fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
2215    unsafe { comilesd(a, b) }
2216}
2217
2218#[inline]
2222#[target_feature(enable = "sse2")]
2223#[cfg_attr(test, assert_instr(comisd))]
2224#[stable(feature = "simd_x86", since = "1.27.0")]
2225pub fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
2226    unsafe { comigtsd(a, b) }
2227}
2228
2229#[inline]
2233#[target_feature(enable = "sse2")]
2234#[cfg_attr(test, assert_instr(comisd))]
2235#[stable(feature = "simd_x86", since = "1.27.0")]
2236pub fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
2237    unsafe { comigesd(a, b) }
2238}
2239
2240#[inline]
2244#[target_feature(enable = "sse2")]
2245#[cfg_attr(test, assert_instr(comisd))]
2246#[stable(feature = "simd_x86", since = "1.27.0")]
2247pub fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
2248    unsafe { comineqsd(a, b) }
2249}
2250
2251#[inline]
2255#[target_feature(enable = "sse2")]
2256#[cfg_attr(test, assert_instr(ucomisd))]
2257#[stable(feature = "simd_x86", since = "1.27.0")]
2258pub fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
2259    unsafe { ucomieqsd(a, b) }
2260}
2261
2262#[inline]
2266#[target_feature(enable = "sse2")]
2267#[cfg_attr(test, assert_instr(ucomisd))]
2268#[stable(feature = "simd_x86", since = "1.27.0")]
2269pub fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
2270    unsafe { ucomiltsd(a, b) }
2271}
2272
2273#[inline]
2277#[target_feature(enable = "sse2")]
2278#[cfg_attr(test, assert_instr(ucomisd))]
2279#[stable(feature = "simd_x86", since = "1.27.0")]
2280pub fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
2281    unsafe { ucomilesd(a, b) }
2282}
2283
2284#[inline]
2288#[target_feature(enable = "sse2")]
2289#[cfg_attr(test, assert_instr(ucomisd))]
2290#[stable(feature = "simd_x86", since = "1.27.0")]
2291pub fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
2292    unsafe { ucomigtsd(a, b) }
2293}
2294
2295#[inline]
2299#[target_feature(enable = "sse2")]
2300#[cfg_attr(test, assert_instr(ucomisd))]
2301#[stable(feature = "simd_x86", since = "1.27.0")]
2302pub fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
2303    unsafe { ucomigesd(a, b) }
2304}
2305
2306#[inline]
2310#[target_feature(enable = "sse2")]
2311#[cfg_attr(test, assert_instr(ucomisd))]
2312#[stable(feature = "simd_x86", since = "1.27.0")]
2313pub fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
2314    unsafe { ucomineqsd(a, b) }
2315}
2316
2317#[inline]
2322#[target_feature(enable = "sse2")]
2323#[cfg_attr(test, assert_instr(cvtpd2ps))]
2324#[stable(feature = "simd_x86", since = "1.27.0")]
2325pub fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
2326    unsafe {
2327        let r = simd_cast::<_, f32x2>(a.as_f64x2());
2328        let zero = f32x2::ZERO;
2329        transmute::<f32x4, _>(simd_shuffle!(r, zero, [0, 1, 2, 3]))
2330    }
2331}
2332
2333#[inline]
2339#[target_feature(enable = "sse2")]
2340#[cfg_attr(test, assert_instr(cvtps2pd))]
2341#[stable(feature = "simd_x86", since = "1.27.0")]
2342pub fn _mm_cvtps_pd(a: __m128) -> __m128d {
2343    unsafe {
2344        let a = a.as_f32x4();
2345        transmute(simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [0, 1])))
2346    }
2347}
2348
2349#[inline]
2354#[target_feature(enable = "sse2")]
2355#[cfg_attr(test, assert_instr(cvtpd2dq))]
2356#[stable(feature = "simd_x86", since = "1.27.0")]
2357pub fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
2358    unsafe { transmute(cvtpd2dq(a)) }
2359}
2360
2361#[inline]
2366#[target_feature(enable = "sse2")]
2367#[cfg_attr(test, assert_instr(cvtsd2si))]
2368#[stable(feature = "simd_x86", since = "1.27.0")]
2369pub fn _mm_cvtsd_si32(a: __m128d) -> i32 {
2370    unsafe { cvtsd2si(a) }
2371}
2372
2373#[inline]
2380#[target_feature(enable = "sse2")]
2381#[cfg_attr(test, assert_instr(cvtsd2ss))]
2382#[stable(feature = "simd_x86", since = "1.27.0")]
2383pub fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
2384    unsafe { cvtsd2ss(a, b) }
2385}
2386
2387#[inline]
2391#[target_feature(enable = "sse2")]
2392#[stable(feature = "simd_x86", since = "1.27.0")]
2393pub fn _mm_cvtsd_f64(a: __m128d) -> f64 {
2394    unsafe { simd_extract!(a, 0) }
2395}
2396
2397#[inline]
2404#[target_feature(enable = "sse2")]
2405#[cfg_attr(test, assert_instr(cvtss2sd))]
2406#[stable(feature = "simd_x86", since = "1.27.0")]
2407pub fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
2408    unsafe { cvtss2sd(a, b) }
2409}
2410
2411#[inline]
2416#[target_feature(enable = "sse2")]
2417#[cfg_attr(test, assert_instr(cvttpd2dq))]
2418#[stable(feature = "simd_x86", since = "1.27.0")]
2419pub fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
2420    unsafe { transmute(cvttpd2dq(a)) }
2421}
2422
2423#[inline]
2428#[target_feature(enable = "sse2")]
2429#[cfg_attr(test, assert_instr(cvttsd2si))]
2430#[stable(feature = "simd_x86", since = "1.27.0")]
2431pub fn _mm_cvttsd_si32(a: __m128d) -> i32 {
2432    unsafe { cvttsd2si(a) }
2433}
2434
2435#[inline]
2440#[target_feature(enable = "sse2")]
2441#[cfg_attr(test, assert_instr(cvttps2dq))]
2442#[stable(feature = "simd_x86", since = "1.27.0")]
2443pub fn _mm_cvttps_epi32(a: __m128) -> __m128i {
2444    unsafe { transmute(cvttps2dq(a)) }
2445}
2446
2447#[inline]
2452#[target_feature(enable = "sse2")]
2453#[stable(feature = "simd_x86", since = "1.27.0")]
2454pub fn _mm_set_sd(a: f64) -> __m128d {
2455    _mm_set_pd(0.0, a)
2456}
2457
2458#[inline]
2463#[target_feature(enable = "sse2")]
2464#[stable(feature = "simd_x86", since = "1.27.0")]
2465pub fn _mm_set1_pd(a: f64) -> __m128d {
2466    _mm_set_pd(a, a)
2467}
2468
2469#[inline]
2474#[target_feature(enable = "sse2")]
2475#[stable(feature = "simd_x86", since = "1.27.0")]
2476pub fn _mm_set_pd1(a: f64) -> __m128d {
2477    _mm_set_pd(a, a)
2478}
2479
2480#[inline]
2485#[target_feature(enable = "sse2")]
2486#[stable(feature = "simd_x86", since = "1.27.0")]
2487pub fn _mm_set_pd(a: f64, b: f64) -> __m128d {
2488    __m128d([b, a])
2489}
2490
2491#[inline]
2496#[target_feature(enable = "sse2")]
2497#[stable(feature = "simd_x86", since = "1.27.0")]
2498pub fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
2499    _mm_set_pd(b, a)
2500}
2501
2502#[inline]
2507#[target_feature(enable = "sse2")]
2508#[cfg_attr(test, assert_instr(xorp))]
2509#[stable(feature = "simd_x86", since = "1.27.0")]
2510pub fn _mm_setzero_pd() -> __m128d {
2511    const { unsafe { mem::zeroed() } }
2512}
2513
2514#[inline]
2521#[target_feature(enable = "sse2")]
2522#[cfg_attr(test, assert_instr(movmskpd))]
2523#[stable(feature = "simd_x86", since = "1.27.0")]
2524pub fn _mm_movemask_pd(a: __m128d) -> i32 {
2525    unsafe {
2528        let mask: i64x2 = simd_lt(transmute(a), i64x2::ZERO);
2529        simd_bitmask::<i64x2, u8>(mask).into()
2530    }
2531}
2532
2533#[inline]
2540#[target_feature(enable = "sse2")]
2541#[cfg_attr(test, assert_instr(movaps))]
2542#[stable(feature = "simd_x86", since = "1.27.0")]
2543#[allow(clippy::cast_ptr_alignment)]
2544pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
2545    *(mem_addr as *const __m128d)
2546}
2547
2548#[inline]
2553#[target_feature(enable = "sse2")]
2554#[cfg_attr(test, assert_instr(movsd))]
2555#[stable(feature = "simd_x86", since = "1.27.0")]
2556pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
2557    _mm_setr_pd(*mem_addr, 0.)
2558}
2559
2560#[inline]
2566#[target_feature(enable = "sse2")]
2567#[cfg_attr(test, assert_instr(movhps))]
2568#[stable(feature = "simd_x86", since = "1.27.0")]
2569pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2570    _mm_setr_pd(simd_extract!(a, 0), *mem_addr)
2571}
2572
2573#[inline]
2579#[target_feature(enable = "sse2")]
2580#[cfg_attr(test, assert_instr(movlps))]
2581#[stable(feature = "simd_x86", since = "1.27.0")]
2582pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2583    _mm_setr_pd(*mem_addr, simd_extract!(a, 1))
2584}
2585
2586#[inline]
2602#[target_feature(enable = "sse2")]
2603#[cfg_attr(test, assert_instr(movntpd))]
2604#[stable(feature = "simd_x86", since = "1.27.0")]
2605#[allow(clippy::cast_ptr_alignment)]
2606pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
2607    crate::arch::asm!(
2608        vps!("movntpd", ",{a}"),
2609        p = in(reg) mem_addr,
2610        a = in(xmm_reg) a,
2611        options(nostack, preserves_flags),
2612    );
2613}
2614
2615#[inline]
2620#[target_feature(enable = "sse2")]
2621#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlps))]
2622#[stable(feature = "simd_x86", since = "1.27.0")]
2623pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
2624    *mem_addr = simd_extract!(a, 0)
2625}
2626
2627#[inline]
2633#[target_feature(enable = "sse2")]
2634#[cfg_attr(test, assert_instr(movaps))]
2635#[stable(feature = "simd_x86", since = "1.27.0")]
2636#[allow(clippy::cast_ptr_alignment)]
2637pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
2638    *(mem_addr as *mut __m128d) = a;
2639}
2640
2641#[inline]
2647#[target_feature(enable = "sse2")]
2648#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
2650pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
2651    mem_addr.cast::<__m128d>().write_unaligned(a);
2652}
2653
2654#[inline]
2660#[target_feature(enable = "sse2")]
2661#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2662pub unsafe fn _mm_storeu_si16(mem_addr: *mut u8, a: __m128i) {
2663    ptr::write_unaligned(mem_addr as *mut i16, simd_extract(a.as_i16x8(), 0))
2664}
2665
2666#[inline]
2672#[target_feature(enable = "sse2")]
2673#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2674pub unsafe fn _mm_storeu_si32(mem_addr: *mut u8, a: __m128i) {
2675    ptr::write_unaligned(mem_addr as *mut i32, simd_extract(a.as_i32x4(), 0))
2676}
2677
2678#[inline]
2684#[target_feature(enable = "sse2")]
2685#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2686pub unsafe fn _mm_storeu_si64(mem_addr: *mut u8, a: __m128i) {
2687    ptr::write_unaligned(mem_addr as *mut i64, simd_extract(a.as_i64x2(), 0))
2688}
2689
2690#[inline]
2696#[target_feature(enable = "sse2")]
2697#[stable(feature = "simd_x86", since = "1.27.0")]
2698#[allow(clippy::cast_ptr_alignment)]
2699pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
2700    let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2701    *(mem_addr as *mut __m128d) = b;
2702}
2703
2704#[inline]
2710#[target_feature(enable = "sse2")]
2711#[stable(feature = "simd_x86", since = "1.27.0")]
2712#[allow(clippy::cast_ptr_alignment)]
2713pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
2714    let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2715    *(mem_addr as *mut __m128d) = b;
2716}
2717
2718#[inline]
2725#[target_feature(enable = "sse2")]
2726#[stable(feature = "simd_x86", since = "1.27.0")]
2727#[allow(clippy::cast_ptr_alignment)]
2728pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
2729    let b: __m128d = simd_shuffle!(a, a, [1, 0]);
2730    *(mem_addr as *mut __m128d) = b;
2731}
2732
2733#[inline]
2738#[target_feature(enable = "sse2")]
2739#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movhps))]
2740#[stable(feature = "simd_x86", since = "1.27.0")]
2741pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
2742    *mem_addr = simd_extract!(a, 1);
2743}
2744
2745#[inline]
2750#[target_feature(enable = "sse2")]
2751#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlps))]
2752#[stable(feature = "simd_x86", since = "1.27.0")]
2753pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
2754    *mem_addr = simd_extract!(a, 0);
2755}
2756
2757#[inline]
2762#[target_feature(enable = "sse2")]
2763#[stable(feature = "simd_x86", since = "1.27.0")]
2765pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
2766    let d = *mem_addr;
2767    _mm_setr_pd(d, d)
2768}
2769
2770#[inline]
2775#[target_feature(enable = "sse2")]
2776#[stable(feature = "simd_x86", since = "1.27.0")]
2778pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
2779    _mm_load1_pd(mem_addr)
2780}
2781
2782#[inline]
2788#[target_feature(enable = "sse2")]
2789#[cfg_attr(test, assert_instr(movaps))]
2790#[stable(feature = "simd_x86", since = "1.27.0")]
2791pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
2792    let a = _mm_load_pd(mem_addr);
2793    simd_shuffle!(a, a, [1, 0])
2794}
2795
2796#[inline]
2802#[target_feature(enable = "sse2")]
2803#[cfg_attr(test, assert_instr(movups))]
2804#[stable(feature = "simd_x86", since = "1.27.0")]
2805pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
2806    let mut dst = _mm_undefined_pd();
2807    ptr::copy_nonoverlapping(
2808        mem_addr as *const u8,
2809        ptr::addr_of_mut!(dst) as *mut u8,
2810        mem::size_of::<__m128d>(),
2811    );
2812    dst
2813}
2814
2815#[inline]
2821#[target_feature(enable = "sse2")]
2822#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2823pub unsafe fn _mm_loadu_si16(mem_addr: *const u8) -> __m128i {
2824    transmute(i16x8::new(
2825        ptr::read_unaligned(mem_addr as *const i16),
2826        0,
2827        0,
2828        0,
2829        0,
2830        0,
2831        0,
2832        0,
2833    ))
2834}
2835
2836#[inline]
2842#[target_feature(enable = "sse2")]
2843#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2844pub unsafe fn _mm_loadu_si32(mem_addr: *const u8) -> __m128i {
2845    transmute(i32x4::new(
2846        ptr::read_unaligned(mem_addr as *const i32),
2847        0,
2848        0,
2849        0,
2850    ))
2851}
2852
2853#[inline]
2859#[target_feature(enable = "sse2")]
2860#[stable(feature = "simd_x86_mm_loadu_si64", since = "1.46.0")]
2861pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
2862    transmute(i64x2::new(ptr::read_unaligned(mem_addr as *const i64), 0))
2863}
2864
2865#[inline]
2871#[target_feature(enable = "sse2")]
2872#[cfg_attr(test, assert_instr(shufps, MASK = 2))]
2873#[rustc_legacy_const_generics(2)]
2874#[stable(feature = "simd_x86", since = "1.27.0")]
2875pub fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
2876    static_assert_uimm_bits!(MASK, 8);
2877    unsafe { simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) }
2878}
2879
2880#[inline]
2886#[target_feature(enable = "sse2")]
2887#[cfg_attr(test, assert_instr(movsd))]
2888#[stable(feature = "simd_x86", since = "1.27.0")]
2889pub fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
2890    unsafe { _mm_setr_pd(simd_extract!(b, 0), simd_extract!(a, 1)) }
2891}
2892
2893#[inline]
2898#[target_feature(enable = "sse2")]
2899#[stable(feature = "simd_x86", since = "1.27.0")]
2900pub fn _mm_castpd_ps(a: __m128d) -> __m128 {
2901    unsafe { transmute(a) }
2902}
2903
2904#[inline]
2909#[target_feature(enable = "sse2")]
2910#[stable(feature = "simd_x86", since = "1.27.0")]
2911pub fn _mm_castpd_si128(a: __m128d) -> __m128i {
2912    unsafe { transmute(a) }
2913}
2914
2915#[inline]
2920#[target_feature(enable = "sse2")]
2921#[stable(feature = "simd_x86", since = "1.27.0")]
2922pub fn _mm_castps_pd(a: __m128) -> __m128d {
2923    unsafe { transmute(a) }
2924}
2925
2926#[inline]
2931#[target_feature(enable = "sse2")]
2932#[stable(feature = "simd_x86", since = "1.27.0")]
2933pub fn _mm_castps_si128(a: __m128) -> __m128i {
2934    unsafe { transmute(a) }
2935}
2936
2937#[inline]
2942#[target_feature(enable = "sse2")]
2943#[stable(feature = "simd_x86", since = "1.27.0")]
2944pub fn _mm_castsi128_pd(a: __m128i) -> __m128d {
2945    unsafe { transmute(a) }
2946}
2947
2948#[inline]
2953#[target_feature(enable = "sse2")]
2954#[stable(feature = "simd_x86", since = "1.27.0")]
2955pub fn _mm_castsi128_ps(a: __m128i) -> __m128 {
2956    unsafe { transmute(a) }
2957}
2958
2959#[inline]
2965#[target_feature(enable = "sse2")]
2966#[stable(feature = "simd_x86", since = "1.27.0")]
2967pub fn _mm_undefined_pd() -> __m128d {
2968    const { unsafe { mem::zeroed() } }
2969}
2970
2971#[inline]
2977#[target_feature(enable = "sse2")]
2978#[stable(feature = "simd_x86", since = "1.27.0")]
2979pub fn _mm_undefined_si128() -> __m128i {
2980    const { unsafe { mem::zeroed() } }
2981}
2982
2983#[inline]
2991#[target_feature(enable = "sse2")]
2992#[cfg_attr(test, assert_instr(unpckhpd))]
2993#[stable(feature = "simd_x86", since = "1.27.0")]
2994pub fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
2995    unsafe { simd_shuffle!(a, b, [1, 3]) }
2996}
2997
2998#[inline]
3006#[target_feature(enable = "sse2")]
3007#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlhps))]
3008#[stable(feature = "simd_x86", since = "1.27.0")]
3009pub fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
3010    unsafe { simd_shuffle!(a, b, [0, 2]) }
3011}
3012
3013#[allow(improper_ctypes)]
3014unsafe extern "C" {
3015    #[link_name = "llvm.x86.sse2.pause"]
3016    fn pause();
3017    #[link_name = "llvm.x86.sse2.clflush"]
3018    fn clflush(p: *const u8);
3019    #[link_name = "llvm.x86.sse2.lfence"]
3020    fn lfence();
3021    #[link_name = "llvm.x86.sse2.mfence"]
3022    fn mfence();
3023    #[link_name = "llvm.x86.sse2.pmadd.wd"]
3024    fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
3025    #[link_name = "llvm.x86.sse2.psad.bw"]
3026    fn psadbw(a: u8x16, b: u8x16) -> u64x2;
3027    #[link_name = "llvm.x86.sse2.psll.w"]
3028    fn psllw(a: i16x8, count: i16x8) -> i16x8;
3029    #[link_name = "llvm.x86.sse2.psll.d"]
3030    fn pslld(a: i32x4, count: i32x4) -> i32x4;
3031    #[link_name = "llvm.x86.sse2.psll.q"]
3032    fn psllq(a: i64x2, count: i64x2) -> i64x2;
3033    #[link_name = "llvm.x86.sse2.psra.w"]
3034    fn psraw(a: i16x8, count: i16x8) -> i16x8;
3035    #[link_name = "llvm.x86.sse2.psra.d"]
3036    fn psrad(a: i32x4, count: i32x4) -> i32x4;
3037    #[link_name = "llvm.x86.sse2.psrl.w"]
3038    fn psrlw(a: i16x8, count: i16x8) -> i16x8;
3039    #[link_name = "llvm.x86.sse2.psrl.d"]
3040    fn psrld(a: i32x4, count: i32x4) -> i32x4;
3041    #[link_name = "llvm.x86.sse2.psrl.q"]
3042    fn psrlq(a: i64x2, count: i64x2) -> i64x2;
3043    #[link_name = "llvm.x86.sse2.cvtps2dq"]
3044    fn cvtps2dq(a: __m128) -> i32x4;
3045    #[link_name = "llvm.x86.sse2.maskmov.dqu"]
3046    fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
3047    #[link_name = "llvm.x86.sse2.packsswb.128"]
3048    fn packsswb(a: i16x8, b: i16x8) -> i8x16;
3049    #[link_name = "llvm.x86.sse2.packssdw.128"]
3050    fn packssdw(a: i32x4, b: i32x4) -> i16x8;
3051    #[link_name = "llvm.x86.sse2.packuswb.128"]
3052    fn packuswb(a: i16x8, b: i16x8) -> u8x16;
3053    #[link_name = "llvm.x86.sse2.max.sd"]
3054    fn maxsd(a: __m128d, b: __m128d) -> __m128d;
3055    #[link_name = "llvm.x86.sse2.max.pd"]
3056    fn maxpd(a: __m128d, b: __m128d) -> __m128d;
3057    #[link_name = "llvm.x86.sse2.min.sd"]
3058    fn minsd(a: __m128d, b: __m128d) -> __m128d;
3059    #[link_name = "llvm.x86.sse2.min.pd"]
3060    fn minpd(a: __m128d, b: __m128d) -> __m128d;
3061    #[link_name = "llvm.x86.sse2.cmp.sd"]
3062    fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3063    #[link_name = "llvm.x86.sse2.cmp.pd"]
3064    fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3065    #[link_name = "llvm.x86.sse2.comieq.sd"]
3066    fn comieqsd(a: __m128d, b: __m128d) -> i32;
3067    #[link_name = "llvm.x86.sse2.comilt.sd"]
3068    fn comiltsd(a: __m128d, b: __m128d) -> i32;
3069    #[link_name = "llvm.x86.sse2.comile.sd"]
3070    fn comilesd(a: __m128d, b: __m128d) -> i32;
3071    #[link_name = "llvm.x86.sse2.comigt.sd"]
3072    fn comigtsd(a: __m128d, b: __m128d) -> i32;
3073    #[link_name = "llvm.x86.sse2.comige.sd"]
3074    fn comigesd(a: __m128d, b: __m128d) -> i32;
3075    #[link_name = "llvm.x86.sse2.comineq.sd"]
3076    fn comineqsd(a: __m128d, b: __m128d) -> i32;
3077    #[link_name = "llvm.x86.sse2.ucomieq.sd"]
3078    fn ucomieqsd(a: __m128d, b: __m128d) -> i32;
3079    #[link_name = "llvm.x86.sse2.ucomilt.sd"]
3080    fn ucomiltsd(a: __m128d, b: __m128d) -> i32;
3081    #[link_name = "llvm.x86.sse2.ucomile.sd"]
3082    fn ucomilesd(a: __m128d, b: __m128d) -> i32;
3083    #[link_name = "llvm.x86.sse2.ucomigt.sd"]
3084    fn ucomigtsd(a: __m128d, b: __m128d) -> i32;
3085    #[link_name = "llvm.x86.sse2.ucomige.sd"]
3086    fn ucomigesd(a: __m128d, b: __m128d) -> i32;
3087    #[link_name = "llvm.x86.sse2.ucomineq.sd"]
3088    fn ucomineqsd(a: __m128d, b: __m128d) -> i32;
3089    #[link_name = "llvm.x86.sse2.cvtpd2dq"]
3090    fn cvtpd2dq(a: __m128d) -> i32x4;
3091    #[link_name = "llvm.x86.sse2.cvtsd2si"]
3092    fn cvtsd2si(a: __m128d) -> i32;
3093    #[link_name = "llvm.x86.sse2.cvtsd2ss"]
3094    fn cvtsd2ss(a: __m128, b: __m128d) -> __m128;
3095    #[link_name = "llvm.x86.sse2.cvtss2sd"]
3096    fn cvtss2sd(a: __m128d, b: __m128) -> __m128d;
3097    #[link_name = "llvm.x86.sse2.cvttpd2dq"]
3098    fn cvttpd2dq(a: __m128d) -> i32x4;
3099    #[link_name = "llvm.x86.sse2.cvttsd2si"]
3100    fn cvttsd2si(a: __m128d) -> i32;
3101    #[link_name = "llvm.x86.sse2.cvttps2dq"]
3102    fn cvttps2dq(a: __m128) -> i32x4;
3103}
3104
3105#[cfg(test)]
3106mod tests {
3107    use crate::{
3108        core_arch::{simd::*, x86::*},
3109        hint::black_box,
3110    };
3111    use std::{
3112        boxed, f32, f64,
3113        mem::{self, transmute},
3114        ptr,
3115    };
3116    use stdarch_test::simd_test;
3117
3118    const NAN: f64 = f64::NAN;
3119
3120    #[test]
3121    fn test_mm_pause() {
3122        unsafe { _mm_pause() }
3123    }
3124
3125    #[simd_test(enable = "sse2")]
3126    unsafe fn test_mm_clflush() {
3127        let x = 0_u8;
3128        _mm_clflush(ptr::addr_of!(x));
3129    }
3130
3131    #[simd_test(enable = "sse2")]
3132    #[cfg_attr(miri, ignore)]
3134    unsafe fn test_mm_lfence() {
3135        _mm_lfence();
3136    }
3137
3138    #[simd_test(enable = "sse2")]
3139    #[cfg_attr(miri, ignore)]
3141    unsafe fn test_mm_mfence() {
3142        _mm_mfence();
3143    }
3144
3145    #[simd_test(enable = "sse2")]
3146    unsafe fn test_mm_add_epi8() {
3147        let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3148        #[rustfmt::skip]
3149        let b = _mm_setr_epi8(
3150            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3151        );
3152        let r = _mm_add_epi8(a, b);
3153        #[rustfmt::skip]
3154        let e = _mm_setr_epi8(
3155            16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3156        );
3157        assert_eq_m128i(r, e);
3158    }
3159
3160    #[simd_test(enable = "sse2")]
3161    unsafe fn test_mm_add_epi8_overflow() {
3162        let a = _mm_set1_epi8(0x7F);
3163        let b = _mm_set1_epi8(1);
3164        let r = _mm_add_epi8(a, b);
3165        assert_eq_m128i(r, _mm_set1_epi8(-128));
3166    }
3167
3168    #[simd_test(enable = "sse2")]
3169    unsafe fn test_mm_add_epi16() {
3170        let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3171        let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3172        let r = _mm_add_epi16(a, b);
3173        let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3174        assert_eq_m128i(r, e);
3175    }
3176
3177    #[simd_test(enable = "sse2")]
3178    unsafe fn test_mm_add_epi32() {
3179        let a = _mm_setr_epi32(0, 1, 2, 3);
3180        let b = _mm_setr_epi32(4, 5, 6, 7);
3181        let r = _mm_add_epi32(a, b);
3182        let e = _mm_setr_epi32(4, 6, 8, 10);
3183        assert_eq_m128i(r, e);
3184    }
3185
3186    #[simd_test(enable = "sse2")]
3187    unsafe fn test_mm_add_epi64() {
3188        let a = _mm_setr_epi64x(0, 1);
3189        let b = _mm_setr_epi64x(2, 3);
3190        let r = _mm_add_epi64(a, b);
3191        let e = _mm_setr_epi64x(2, 4);
3192        assert_eq_m128i(r, e);
3193    }
3194
3195    #[simd_test(enable = "sse2")]
3196    unsafe fn test_mm_adds_epi8() {
3197        let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3198        #[rustfmt::skip]
3199        let b = _mm_setr_epi8(
3200            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3201        );
3202        let r = _mm_adds_epi8(a, b);
3203        #[rustfmt::skip]
3204        let e = _mm_setr_epi8(
3205            16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3206        );
3207        assert_eq_m128i(r, e);
3208    }
3209
3210    #[simd_test(enable = "sse2")]
3211    unsafe fn test_mm_adds_epi8_saturate_positive() {
3212        let a = _mm_set1_epi8(0x7F);
3213        let b = _mm_set1_epi8(1);
3214        let r = _mm_adds_epi8(a, b);
3215        assert_eq_m128i(r, a);
3216    }
3217
3218    #[simd_test(enable = "sse2")]
3219    unsafe fn test_mm_adds_epi8_saturate_negative() {
3220        let a = _mm_set1_epi8(-0x80);
3221        let b = _mm_set1_epi8(-1);
3222        let r = _mm_adds_epi8(a, b);
3223        assert_eq_m128i(r, a);
3224    }
3225
3226    #[simd_test(enable = "sse2")]
3227    unsafe fn test_mm_adds_epi16() {
3228        let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3229        let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3230        let r = _mm_adds_epi16(a, b);
3231        let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3232        assert_eq_m128i(r, e);
3233    }
3234
3235    #[simd_test(enable = "sse2")]
3236    unsafe fn test_mm_adds_epi16_saturate_positive() {
3237        let a = _mm_set1_epi16(0x7FFF);
3238        let b = _mm_set1_epi16(1);
3239        let r = _mm_adds_epi16(a, b);
3240        assert_eq_m128i(r, a);
3241    }
3242
3243    #[simd_test(enable = "sse2")]
3244    unsafe fn test_mm_adds_epi16_saturate_negative() {
3245        let a = _mm_set1_epi16(-0x8000);
3246        let b = _mm_set1_epi16(-1);
3247        let r = _mm_adds_epi16(a, b);
3248        assert_eq_m128i(r, a);
3249    }
3250
3251    #[simd_test(enable = "sse2")]
3252    unsafe fn test_mm_adds_epu8() {
3253        let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3254        #[rustfmt::skip]
3255        let b = _mm_setr_epi8(
3256            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3257        );
3258        let r = _mm_adds_epu8(a, b);
3259        #[rustfmt::skip]
3260        let e = _mm_setr_epi8(
3261            16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3262        );
3263        assert_eq_m128i(r, e);
3264    }
3265
3266    #[simd_test(enable = "sse2")]
3267    unsafe fn test_mm_adds_epu8_saturate() {
3268        let a = _mm_set1_epi8(!0);
3269        let b = _mm_set1_epi8(1);
3270        let r = _mm_adds_epu8(a, b);
3271        assert_eq_m128i(r, a);
3272    }
3273
3274    #[simd_test(enable = "sse2")]
3275    unsafe fn test_mm_adds_epu16() {
3276        let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3277        let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3278        let r = _mm_adds_epu16(a, b);
3279        let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3280        assert_eq_m128i(r, e);
3281    }
3282
3283    #[simd_test(enable = "sse2")]
3284    unsafe fn test_mm_adds_epu16_saturate() {
3285        let a = _mm_set1_epi16(!0);
3286        let b = _mm_set1_epi16(1);
3287        let r = _mm_adds_epu16(a, b);
3288        assert_eq_m128i(r, a);
3289    }
3290
3291    #[simd_test(enable = "sse2")]
3292    unsafe fn test_mm_avg_epu8() {
3293        let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
3294        let r = _mm_avg_epu8(a, b);
3295        assert_eq_m128i(r, _mm_set1_epi8(6));
3296    }
3297
3298    #[simd_test(enable = "sse2")]
3299    unsafe fn test_mm_avg_epu16() {
3300        let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
3301        let r = _mm_avg_epu16(a, b);
3302        assert_eq_m128i(r, _mm_set1_epi16(6));
3303    }
3304
3305    #[simd_test(enable = "sse2")]
3306    unsafe fn test_mm_madd_epi16() {
3307        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
3308        let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
3309        let r = _mm_madd_epi16(a, b);
3310        let e = _mm_setr_epi32(29, 81, 149, 233);
3311        assert_eq_m128i(r, e);
3312
3313        let a = _mm_setr_epi16(
3316            i16::MAX,
3317            i16::MAX,
3318            i16::MIN,
3319            i16::MIN,
3320            i16::MIN,
3321            i16::MAX,
3322            0,
3323            0,
3324        );
3325        let b = _mm_setr_epi16(
3326            i16::MAX,
3327            i16::MAX,
3328            i16::MIN,
3329            i16::MIN,
3330            i16::MAX,
3331            i16::MIN,
3332            0,
3333            0,
3334        );
3335        let r = _mm_madd_epi16(a, b);
3336        let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0);
3337        assert_eq_m128i(r, e);
3338    }
3339
3340    #[simd_test(enable = "sse2")]
3341    unsafe fn test_mm_max_epi16() {
3342        let a = _mm_set1_epi16(1);
3343        let b = _mm_set1_epi16(-1);
3344        let r = _mm_max_epi16(a, b);
3345        assert_eq_m128i(r, a);
3346    }
3347
3348    #[simd_test(enable = "sse2")]
3349    unsafe fn test_mm_max_epu8() {
3350        let a = _mm_set1_epi8(1);
3351        let b = _mm_set1_epi8(!0);
3352        let r = _mm_max_epu8(a, b);
3353        assert_eq_m128i(r, b);
3354    }
3355
3356    #[simd_test(enable = "sse2")]
3357    unsafe fn test_mm_min_epi16() {
3358        let a = _mm_set1_epi16(1);
3359        let b = _mm_set1_epi16(-1);
3360        let r = _mm_min_epi16(a, b);
3361        assert_eq_m128i(r, b);
3362    }
3363
3364    #[simd_test(enable = "sse2")]
3365    unsafe fn test_mm_min_epu8() {
3366        let a = _mm_set1_epi8(1);
3367        let b = _mm_set1_epi8(!0);
3368        let r = _mm_min_epu8(a, b);
3369        assert_eq_m128i(r, a);
3370    }
3371
3372    #[simd_test(enable = "sse2")]
3373    unsafe fn test_mm_mulhi_epi16() {
3374        let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3375        let r = _mm_mulhi_epi16(a, b);
3376        assert_eq_m128i(r, _mm_set1_epi16(-16));
3377    }
3378
3379    #[simd_test(enable = "sse2")]
3380    unsafe fn test_mm_mulhi_epu16() {
3381        let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
3382        let r = _mm_mulhi_epu16(a, b);
3383        assert_eq_m128i(r, _mm_set1_epi16(15));
3384    }
3385
3386    #[simd_test(enable = "sse2")]
3387    unsafe fn test_mm_mullo_epi16() {
3388        let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3389        let r = _mm_mullo_epi16(a, b);
3390        assert_eq_m128i(r, _mm_set1_epi16(-17960));
3391    }
3392
3393    #[simd_test(enable = "sse2")]
3394    unsafe fn test_mm_mul_epu32() {
3395        let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
3396        let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
3397        let r = _mm_mul_epu32(a, b);
3398        let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
3399        assert_eq_m128i(r, e);
3400    }
3401
3402    #[simd_test(enable = "sse2")]
3403    unsafe fn test_mm_sad_epu8() {
3404        #[rustfmt::skip]
3405        let a = _mm_setr_epi8(
3406            255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
3407            1, 2, 3, 4,
3408            155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
3409            1, 2, 3, 4,
3410        );
3411        let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
3412        let r = _mm_sad_epu8(a, b);
3413        let e = _mm_setr_epi64x(1020, 614);
3414        assert_eq_m128i(r, e);
3415    }
3416
3417    #[simd_test(enable = "sse2")]
3418    unsafe fn test_mm_sub_epi8() {
3419        let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
3420        let r = _mm_sub_epi8(a, b);
3421        assert_eq_m128i(r, _mm_set1_epi8(-1));
3422    }
3423
3424    #[simd_test(enable = "sse2")]
3425    unsafe fn test_mm_sub_epi16() {
3426        let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
3427        let r = _mm_sub_epi16(a, b);
3428        assert_eq_m128i(r, _mm_set1_epi16(-1));
3429    }
3430
3431    #[simd_test(enable = "sse2")]
3432    unsafe fn test_mm_sub_epi32() {
3433        let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
3434        let r = _mm_sub_epi32(a, b);
3435        assert_eq_m128i(r, _mm_set1_epi32(-1));
3436    }
3437
3438    #[simd_test(enable = "sse2")]
3439    unsafe fn test_mm_sub_epi64() {
3440        let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
3441        let r = _mm_sub_epi64(a, b);
3442        assert_eq_m128i(r, _mm_set1_epi64x(-1));
3443    }
3444
3445    #[simd_test(enable = "sse2")]
3446    unsafe fn test_mm_subs_epi8() {
3447        let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3448        let r = _mm_subs_epi8(a, b);
3449        assert_eq_m128i(r, _mm_set1_epi8(3));
3450    }
3451
3452    #[simd_test(enable = "sse2")]
3453    unsafe fn test_mm_subs_epi8_saturate_positive() {
3454        let a = _mm_set1_epi8(0x7F);
3455        let b = _mm_set1_epi8(-1);
3456        let r = _mm_subs_epi8(a, b);
3457        assert_eq_m128i(r, a);
3458    }
3459
3460    #[simd_test(enable = "sse2")]
3461    unsafe fn test_mm_subs_epi8_saturate_negative() {
3462        let a = _mm_set1_epi8(-0x80);
3463        let b = _mm_set1_epi8(1);
3464        let r = _mm_subs_epi8(a, b);
3465        assert_eq_m128i(r, a);
3466    }
3467
3468    #[simd_test(enable = "sse2")]
3469    unsafe fn test_mm_subs_epi16() {
3470        let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3471        let r = _mm_subs_epi16(a, b);
3472        assert_eq_m128i(r, _mm_set1_epi16(3));
3473    }
3474
3475    #[simd_test(enable = "sse2")]
3476    unsafe fn test_mm_subs_epi16_saturate_positive() {
3477        let a = _mm_set1_epi16(0x7FFF);
3478        let b = _mm_set1_epi16(-1);
3479        let r = _mm_subs_epi16(a, b);
3480        assert_eq_m128i(r, a);
3481    }
3482
3483    #[simd_test(enable = "sse2")]
3484    unsafe fn test_mm_subs_epi16_saturate_negative() {
3485        let a = _mm_set1_epi16(-0x8000);
3486        let b = _mm_set1_epi16(1);
3487        let r = _mm_subs_epi16(a, b);
3488        assert_eq_m128i(r, a);
3489    }
3490
3491    #[simd_test(enable = "sse2")]
3492    unsafe fn test_mm_subs_epu8() {
3493        let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3494        let r = _mm_subs_epu8(a, b);
3495        assert_eq_m128i(r, _mm_set1_epi8(3));
3496    }
3497
3498    #[simd_test(enable = "sse2")]
3499    unsafe fn test_mm_subs_epu8_saturate() {
3500        let a = _mm_set1_epi8(0);
3501        let b = _mm_set1_epi8(1);
3502        let r = _mm_subs_epu8(a, b);
3503        assert_eq_m128i(r, a);
3504    }
3505
3506    #[simd_test(enable = "sse2")]
3507    unsafe fn test_mm_subs_epu16() {
3508        let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3509        let r = _mm_subs_epu16(a, b);
3510        assert_eq_m128i(r, _mm_set1_epi16(3));
3511    }
3512
3513    #[simd_test(enable = "sse2")]
3514    unsafe fn test_mm_subs_epu16_saturate() {
3515        let a = _mm_set1_epi16(0);
3516        let b = _mm_set1_epi16(1);
3517        let r = _mm_subs_epu16(a, b);
3518        assert_eq_m128i(r, a);
3519    }
3520
3521    #[simd_test(enable = "sse2")]
3522    unsafe fn test_mm_slli_si128() {
3523        #[rustfmt::skip]
3524        let a = _mm_setr_epi8(
3525            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3526        );
3527        let r = _mm_slli_si128::<1>(a);
3528        let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3529        assert_eq_m128i(r, e);
3530
3531        #[rustfmt::skip]
3532        let a = _mm_setr_epi8(
3533            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3534        );
3535        let r = _mm_slli_si128::<15>(a);
3536        let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
3537        assert_eq_m128i(r, e);
3538
3539        #[rustfmt::skip]
3540        let a = _mm_setr_epi8(
3541            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3542        );
3543        let r = _mm_slli_si128::<16>(a);
3544        assert_eq_m128i(r, _mm_set1_epi8(0));
3545    }
3546
3547    #[simd_test(enable = "sse2")]
3548    unsafe fn test_mm_slli_epi16() {
3549        let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3550        let r = _mm_slli_epi16::<4>(a);
3551        assert_eq_m128i(
3552            r,
3553            _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3554        );
3555        let r = _mm_slli_epi16::<16>(a);
3556        assert_eq_m128i(r, _mm_set1_epi16(0));
3557    }
3558
3559    #[simd_test(enable = "sse2")]
3560    unsafe fn test_mm_sll_epi16() {
3561        let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3562        let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
3563        assert_eq_m128i(
3564            r,
3565            _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3566        );
3567        let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0));
3568        assert_eq_m128i(r, a);
3569        let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16));
3570        assert_eq_m128i(r, _mm_set1_epi16(0));
3571        let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX));
3572        assert_eq_m128i(r, _mm_set1_epi16(0));
3573    }
3574
3575    #[simd_test(enable = "sse2")]
3576    unsafe fn test_mm_slli_epi32() {
3577        let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3578        let r = _mm_slli_epi32::<4>(a);
3579        assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3580        let r = _mm_slli_epi32::<32>(a);
3581        assert_eq_m128i(r, _mm_set1_epi32(0));
3582    }
3583
3584    #[simd_test(enable = "sse2")]
3585    unsafe fn test_mm_sll_epi32() {
3586        let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3587        let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4));
3588        assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3589        let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0));
3590        assert_eq_m128i(r, a);
3591        let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32));
3592        assert_eq_m128i(r, _mm_set1_epi32(0));
3593        let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX));
3594        assert_eq_m128i(r, _mm_set1_epi32(0));
3595    }
3596
3597    #[simd_test(enable = "sse2")]
3598    unsafe fn test_mm_slli_epi64() {
3599        let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3600        let r = _mm_slli_epi64::<4>(a);
3601        assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3602        let r = _mm_slli_epi64::<64>(a);
3603        assert_eq_m128i(r, _mm_set1_epi64x(0));
3604    }
3605
3606    #[simd_test(enable = "sse2")]
3607    unsafe fn test_mm_sll_epi64() {
3608        let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3609        let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4));
3610        assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3611        let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0));
3612        assert_eq_m128i(r, a);
3613        let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64));
3614        assert_eq_m128i(r, _mm_set1_epi64x(0));
3615        let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX));
3616        assert_eq_m128i(r, _mm_set1_epi64x(0));
3617    }
3618
3619    #[simd_test(enable = "sse2")]
3620    unsafe fn test_mm_srai_epi16() {
3621        let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3622        let r = _mm_srai_epi16::<4>(a);
3623        assert_eq_m128i(
3624            r,
3625            _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3626        );
3627        let r = _mm_srai_epi16::<16>(a);
3628        assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3629    }
3630
3631    #[simd_test(enable = "sse2")]
3632    unsafe fn test_mm_sra_epi16() {
3633        let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3634        let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4));
3635        assert_eq_m128i(
3636            r,
3637            _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3638        );
3639        let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0));
3640        assert_eq_m128i(r, a);
3641        let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16));
3642        assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3643        let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX));
3644        assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3645    }
3646
3647    #[simd_test(enable = "sse2")]
3648    unsafe fn test_mm_srai_epi32() {
3649        let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3650        let r = _mm_srai_epi32::<4>(a);
3651        assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3652        let r = _mm_srai_epi32::<32>(a);
3653        assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3654    }
3655
3656    #[simd_test(enable = "sse2")]
3657    unsafe fn test_mm_sra_epi32() {
3658        let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3659        let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4));
3660        assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3661        let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0));
3662        assert_eq_m128i(r, a);
3663        let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32));
3664        assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3665        let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX));
3666        assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3667    }
3668
3669    #[simd_test(enable = "sse2")]
3670    unsafe fn test_mm_srli_si128() {
3671        #[rustfmt::skip]
3672        let a = _mm_setr_epi8(
3673            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3674        );
3675        let r = _mm_srli_si128::<1>(a);
3676        #[rustfmt::skip]
3677        let e = _mm_setr_epi8(
3678            2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
3679        );
3680        assert_eq_m128i(r, e);
3681
3682        #[rustfmt::skip]
3683        let a = _mm_setr_epi8(
3684            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3685        );
3686        let r = _mm_srli_si128::<15>(a);
3687        let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3688        assert_eq_m128i(r, e);
3689
3690        #[rustfmt::skip]
3691        let a = _mm_setr_epi8(
3692            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3693        );
3694        let r = _mm_srli_si128::<16>(a);
3695        assert_eq_m128i(r, _mm_set1_epi8(0));
3696    }
3697
3698    #[simd_test(enable = "sse2")]
3699    unsafe fn test_mm_srli_epi16() {
3700        let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3701        let r = _mm_srli_epi16::<4>(a);
3702        assert_eq_m128i(
3703            r,
3704            _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3705        );
3706        let r = _mm_srli_epi16::<16>(a);
3707        assert_eq_m128i(r, _mm_set1_epi16(0));
3708    }
3709
3710    #[simd_test(enable = "sse2")]
3711    unsafe fn test_mm_srl_epi16() {
3712        let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3713        let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4));
3714        assert_eq_m128i(
3715            r,
3716            _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3717        );
3718        let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0));
3719        assert_eq_m128i(r, a);
3720        let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16));
3721        assert_eq_m128i(r, _mm_set1_epi16(0));
3722        let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX));
3723        assert_eq_m128i(r, _mm_set1_epi16(0));
3724    }
3725
3726    #[simd_test(enable = "sse2")]
3727    unsafe fn test_mm_srli_epi32() {
3728        let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3729        let r = _mm_srli_epi32::<4>(a);
3730        assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3731        let r = _mm_srli_epi32::<32>(a);
3732        assert_eq_m128i(r, _mm_set1_epi32(0));
3733    }
3734
3735    #[simd_test(enable = "sse2")]
3736    unsafe fn test_mm_srl_epi32() {
3737        let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3738        let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4));
3739        assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3740        let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0));
3741        assert_eq_m128i(r, a);
3742        let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32));
3743        assert_eq_m128i(r, _mm_set1_epi32(0));
3744        let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX));
3745        assert_eq_m128i(r, _mm_set1_epi32(0));
3746    }
3747
3748    #[simd_test(enable = "sse2")]
3749    unsafe fn test_mm_srli_epi64() {
3750        let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3751        let r = _mm_srli_epi64::<4>(a);
3752        assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3753        let r = _mm_srli_epi64::<64>(a);
3754        assert_eq_m128i(r, _mm_set1_epi64x(0));
3755    }
3756
3757    #[simd_test(enable = "sse2")]
3758    unsafe fn test_mm_srl_epi64() {
3759        let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3760        let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4));
3761        assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3762        let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0));
3763        assert_eq_m128i(r, a);
3764        let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64));
3765        assert_eq_m128i(r, _mm_set1_epi64x(0));
3766        let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX));
3767        assert_eq_m128i(r, _mm_set1_epi64x(0));
3768    }
3769
3770    #[simd_test(enable = "sse2")]
3771    unsafe fn test_mm_and_si128() {
3772        let a = _mm_set1_epi8(5);
3773        let b = _mm_set1_epi8(3);
3774        let r = _mm_and_si128(a, b);
3775        assert_eq_m128i(r, _mm_set1_epi8(1));
3776    }
3777
3778    #[simd_test(enable = "sse2")]
3779    unsafe fn test_mm_andnot_si128() {
3780        let a = _mm_set1_epi8(5);
3781        let b = _mm_set1_epi8(3);
3782        let r = _mm_andnot_si128(a, b);
3783        assert_eq_m128i(r, _mm_set1_epi8(2));
3784    }
3785
3786    #[simd_test(enable = "sse2")]
3787    unsafe fn test_mm_or_si128() {
3788        let a = _mm_set1_epi8(5);
3789        let b = _mm_set1_epi8(3);
3790        let r = _mm_or_si128(a, b);
3791        assert_eq_m128i(r, _mm_set1_epi8(7));
3792    }
3793
3794    #[simd_test(enable = "sse2")]
3795    unsafe fn test_mm_xor_si128() {
3796        let a = _mm_set1_epi8(5);
3797        let b = _mm_set1_epi8(3);
3798        let r = _mm_xor_si128(a, b);
3799        assert_eq_m128i(r, _mm_set1_epi8(6));
3800    }
3801
3802    #[simd_test(enable = "sse2")]
3803    unsafe fn test_mm_cmpeq_epi8() {
3804        let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3805        let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
3806        let r = _mm_cmpeq_epi8(a, b);
3807        #[rustfmt::skip]
3808        assert_eq_m128i(
3809            r,
3810            _mm_setr_epi8(
3811                0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3812            )
3813        );
3814    }
3815
3816    #[simd_test(enable = "sse2")]
3817    unsafe fn test_mm_cmpeq_epi16() {
3818        let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3819        let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
3820        let r = _mm_cmpeq_epi16(a, b);
3821        assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0));
3822    }
3823
3824    #[simd_test(enable = "sse2")]
3825    unsafe fn test_mm_cmpeq_epi32() {
3826        let a = _mm_setr_epi32(0, 1, 2, 3);
3827        let b = _mm_setr_epi32(3, 2, 2, 0);
3828        let r = _mm_cmpeq_epi32(a, b);
3829        assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0));
3830    }
3831
3832    #[simd_test(enable = "sse2")]
3833    unsafe fn test_mm_cmpgt_epi8() {
3834        let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3835        let b = _mm_set1_epi8(0);
3836        let r = _mm_cmpgt_epi8(a, b);
3837        let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3838        assert_eq_m128i(r, e);
3839    }
3840
3841    #[simd_test(enable = "sse2")]
3842    unsafe fn test_mm_cmpgt_epi16() {
3843        let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3844        let b = _mm_set1_epi16(0);
3845        let r = _mm_cmpgt_epi16(a, b);
3846        let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3847        assert_eq_m128i(r, e);
3848    }
3849
3850    #[simd_test(enable = "sse2")]
3851    unsafe fn test_mm_cmpgt_epi32() {
3852        let a = _mm_set_epi32(5, 0, 0, 0);
3853        let b = _mm_set1_epi32(0);
3854        let r = _mm_cmpgt_epi32(a, b);
3855        assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3856    }
3857
3858    #[simd_test(enable = "sse2")]
3859    unsafe fn test_mm_cmplt_epi8() {
3860        let a = _mm_set1_epi8(0);
3861        let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3862        let r = _mm_cmplt_epi8(a, b);
3863        let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3864        assert_eq_m128i(r, e);
3865    }
3866
3867    #[simd_test(enable = "sse2")]
3868    unsafe fn test_mm_cmplt_epi16() {
3869        let a = _mm_set1_epi16(0);
3870        let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3871        let r = _mm_cmplt_epi16(a, b);
3872        let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3873        assert_eq_m128i(r, e);
3874    }
3875
3876    #[simd_test(enable = "sse2")]
3877    unsafe fn test_mm_cmplt_epi32() {
3878        let a = _mm_set1_epi32(0);
3879        let b = _mm_set_epi32(5, 0, 0, 0);
3880        let r = _mm_cmplt_epi32(a, b);
3881        assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3882    }
3883
3884    #[simd_test(enable = "sse2")]
3885    unsafe fn test_mm_cvtepi32_pd() {
3886        let a = _mm_set_epi32(35, 25, 15, 5);
3887        let r = _mm_cvtepi32_pd(a);
3888        assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0));
3889    }
3890
3891    #[simd_test(enable = "sse2")]
3892    unsafe fn test_mm_cvtsi32_sd() {
3893        let a = _mm_set1_pd(3.5);
3894        let r = _mm_cvtsi32_sd(a, 5);
3895        assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
3896    }
3897
3898    #[simd_test(enable = "sse2")]
3899    unsafe fn test_mm_cvtepi32_ps() {
3900        let a = _mm_setr_epi32(1, 2, 3, 4);
3901        let r = _mm_cvtepi32_ps(a);
3902        assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3903    }
3904
3905    #[simd_test(enable = "sse2")]
3906    unsafe fn test_mm_cvtps_epi32() {
3907        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3908        let r = _mm_cvtps_epi32(a);
3909        assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
3910    }
3911
3912    #[simd_test(enable = "sse2")]
3913    unsafe fn test_mm_cvtsi32_si128() {
3914        let r = _mm_cvtsi32_si128(5);
3915        assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0));
3916    }
3917
3918    #[simd_test(enable = "sse2")]
3919    unsafe fn test_mm_cvtsi128_si32() {
3920        let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
3921        assert_eq!(r, 5);
3922    }
3923
3924    #[simd_test(enable = "sse2")]
3925    unsafe fn test_mm_set_epi64x() {
3926        let r = _mm_set_epi64x(0, 1);
3927        assert_eq_m128i(r, _mm_setr_epi64x(1, 0));
3928    }
3929
3930    #[simd_test(enable = "sse2")]
3931    unsafe fn test_mm_set_epi32() {
3932        let r = _mm_set_epi32(0, 1, 2, 3);
3933        assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0));
3934    }
3935
3936    #[simd_test(enable = "sse2")]
3937    unsafe fn test_mm_set_epi16() {
3938        let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3939        assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
3940    }
3941
3942    #[simd_test(enable = "sse2")]
3943    unsafe fn test_mm_set_epi8() {
3944        #[rustfmt::skip]
3945        let r = _mm_set_epi8(
3946            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3947        );
3948        #[rustfmt::skip]
3949        let e = _mm_setr_epi8(
3950            15, 14, 13, 12, 11, 10, 9, 8,
3951            7, 6, 5, 4, 3, 2, 1, 0,
3952        );
3953        assert_eq_m128i(r, e);
3954    }
3955
3956    #[simd_test(enable = "sse2")]
3957    unsafe fn test_mm_set1_epi64x() {
3958        let r = _mm_set1_epi64x(1);
3959        assert_eq_m128i(r, _mm_set1_epi64x(1));
3960    }
3961
3962    #[simd_test(enable = "sse2")]
3963    unsafe fn test_mm_set1_epi32() {
3964        let r = _mm_set1_epi32(1);
3965        assert_eq_m128i(r, _mm_set1_epi32(1));
3966    }
3967
3968    #[simd_test(enable = "sse2")]
3969    unsafe fn test_mm_set1_epi16() {
3970        let r = _mm_set1_epi16(1);
3971        assert_eq_m128i(r, _mm_set1_epi16(1));
3972    }
3973
3974    #[simd_test(enable = "sse2")]
3975    unsafe fn test_mm_set1_epi8() {
3976        let r = _mm_set1_epi8(1);
3977        assert_eq_m128i(r, _mm_set1_epi8(1));
3978    }
3979
3980    #[simd_test(enable = "sse2")]
3981    unsafe fn test_mm_setr_epi32() {
3982        let r = _mm_setr_epi32(0, 1, 2, 3);
3983        assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3));
3984    }
3985
3986    #[simd_test(enable = "sse2")]
3987    unsafe fn test_mm_setr_epi16() {
3988        let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3989        assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
3990    }
3991
3992    #[simd_test(enable = "sse2")]
3993    unsafe fn test_mm_setr_epi8() {
3994        #[rustfmt::skip]
3995        let r = _mm_setr_epi8(
3996            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3997        );
3998        #[rustfmt::skip]
3999        let e = _mm_setr_epi8(
4000            0, 1, 2, 3, 4, 5, 6, 7,
4001            8, 9, 10, 11, 12, 13, 14, 15,
4002        );
4003        assert_eq_m128i(r, e);
4004    }
4005
4006    #[simd_test(enable = "sse2")]
4007    unsafe fn test_mm_setzero_si128() {
4008        let r = _mm_setzero_si128();
4009        assert_eq_m128i(r, _mm_set1_epi64x(0));
4010    }
4011
4012    #[simd_test(enable = "sse2")]
4013    unsafe fn test_mm_loadl_epi64() {
4014        let a = _mm_setr_epi64x(6, 5);
4015        let r = _mm_loadl_epi64(ptr::addr_of!(a));
4016        assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
4017    }
4018
4019    #[simd_test(enable = "sse2")]
4020    unsafe fn test_mm_load_si128() {
4021        let a = _mm_set_epi64x(5, 6);
4022        let r = _mm_load_si128(ptr::addr_of!(a) as *const _);
4023        assert_eq_m128i(a, r);
4024    }
4025
4026    #[simd_test(enable = "sse2")]
4027    unsafe fn test_mm_loadu_si128() {
4028        let a = _mm_set_epi64x(5, 6);
4029        let r = _mm_loadu_si128(ptr::addr_of!(a) as *const _);
4030        assert_eq_m128i(a, r);
4031    }
4032
4033    #[simd_test(enable = "sse2")]
4034    #[cfg_attr(miri, ignore)]
4037    unsafe fn test_mm_maskmoveu_si128() {
4038        let a = _mm_set1_epi8(9);
4039        #[rustfmt::skip]
4040        let mask = _mm_set_epi8(
4041            0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
4042            0, 0, 0, 0, 0, 0, 0, 0,
4043        );
4044        let mut r = _mm_set1_epi8(0);
4045        _mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8);
4046        let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4047        assert_eq_m128i(r, e);
4048    }
4049
4050    #[simd_test(enable = "sse2")]
4051    unsafe fn test_mm_store_si128() {
4052        let a = _mm_set1_epi8(9);
4053        let mut r = _mm_set1_epi8(0);
4054        _mm_store_si128(&mut r, a);
4055        assert_eq_m128i(r, a);
4056    }
4057
4058    #[simd_test(enable = "sse2")]
4059    unsafe fn test_mm_storeu_si128() {
4060        let a = _mm_set1_epi8(9);
4061        let mut r = _mm_set1_epi8(0);
4062        _mm_storeu_si128(&mut r, a);
4063        assert_eq_m128i(r, a);
4064    }
4065
4066    #[simd_test(enable = "sse2")]
4067    unsafe fn test_mm_storel_epi64() {
4068        let a = _mm_setr_epi64x(2, 9);
4069        let mut r = _mm_set1_epi8(0);
4070        _mm_storel_epi64(&mut r, a);
4071        assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
4072    }
4073
4074    #[simd_test(enable = "sse2")]
4075    #[cfg_attr(miri, ignore)]
4078    unsafe fn test_mm_stream_si128() {
4079        let a = _mm_setr_epi32(1, 2, 3, 4);
4080        let mut r = _mm_undefined_si128();
4081        _mm_stream_si128(ptr::addr_of_mut!(r), a);
4082        assert_eq_m128i(r, a);
4083    }
4084
4085    #[simd_test(enable = "sse2")]
4086    #[cfg_attr(miri, ignore)]
4089    unsafe fn test_mm_stream_si32() {
4090        let a: i32 = 7;
4091        let mut mem = boxed::Box::<i32>::new(-1);
4092        _mm_stream_si32(ptr::addr_of_mut!(*mem), a);
4093        assert_eq!(a, *mem);
4094    }
4095
4096    #[simd_test(enable = "sse2")]
4097    unsafe fn test_mm_move_epi64() {
4098        let a = _mm_setr_epi64x(5, 6);
4099        let r = _mm_move_epi64(a);
4100        assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4101    }
4102
4103    #[simd_test(enable = "sse2")]
4104    unsafe fn test_mm_packs_epi16() {
4105        let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
4106        let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
4107        let r = _mm_packs_epi16(a, b);
4108        #[rustfmt::skip]
4109        assert_eq_m128i(
4110            r,
4111            _mm_setr_epi8(
4112                0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
4113            )
4114        );
4115    }
4116
4117    #[simd_test(enable = "sse2")]
4118    unsafe fn test_mm_packs_epi32() {
4119        let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
4120        let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
4121        let r = _mm_packs_epi32(a, b);
4122        assert_eq_m128i(
4123            r,
4124            _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
4125        );
4126    }
4127
4128    #[simd_test(enable = "sse2")]
4129    unsafe fn test_mm_packus_epi16() {
4130        let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
4131        let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
4132        let r = _mm_packus_epi16(a, b);
4133        assert_eq_m128i(
4134            r,
4135            _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
4136        );
4137    }
4138
4139    #[simd_test(enable = "sse2")]
4140    unsafe fn test_mm_extract_epi16() {
4141        let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
4142        let r1 = _mm_extract_epi16::<0>(a);
4143        let r2 = _mm_extract_epi16::<3>(a);
4144        assert_eq!(r1, 0xFFFF);
4145        assert_eq!(r2, 3);
4146    }
4147
4148    #[simd_test(enable = "sse2")]
4149    unsafe fn test_mm_insert_epi16() {
4150        let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4151        let r = _mm_insert_epi16::<0>(a, 9);
4152        let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
4153        assert_eq_m128i(r, e);
4154    }
4155
4156    #[simd_test(enable = "sse2")]
4157    unsafe fn test_mm_movemask_epi8() {
4158        #[rustfmt::skip]
4159        let a = _mm_setr_epi8(
4160            0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
4161            0b0101, 0b1111_0000u8 as i8, 0, 0,
4162            0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101,
4163            0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
4164        );
4165        let r = _mm_movemask_epi8(a);
4166        assert_eq!(r, 0b10100110_00100101);
4167    }
4168
4169    #[simd_test(enable = "sse2")]
4170    unsafe fn test_mm_shuffle_epi32() {
4171        let a = _mm_setr_epi32(5, 10, 15, 20);
4172        let r = _mm_shuffle_epi32::<0b00_01_01_11>(a);
4173        let e = _mm_setr_epi32(20, 10, 10, 5);
4174        assert_eq_m128i(r, e);
4175    }
4176
4177    #[simd_test(enable = "sse2")]
4178    unsafe fn test_mm_shufflehi_epi16() {
4179        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
4180        let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a);
4181        let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
4182        assert_eq_m128i(r, e);
4183    }
4184
4185    #[simd_test(enable = "sse2")]
4186    unsafe fn test_mm_shufflelo_epi16() {
4187        let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
4188        let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a);
4189        let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
4190        assert_eq_m128i(r, e);
4191    }
4192
4193    #[simd_test(enable = "sse2")]
4194    unsafe fn test_mm_unpackhi_epi8() {
4195        #[rustfmt::skip]
4196        let a = _mm_setr_epi8(
4197            0, 1, 2, 3, 4, 5, 6, 7,
4198            8, 9, 10, 11, 12, 13, 14, 15,
4199        );
4200        #[rustfmt::skip]
4201        let b = _mm_setr_epi8(
4202            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4203        );
4204        let r = _mm_unpackhi_epi8(a, b);
4205        #[rustfmt::skip]
4206        let e = _mm_setr_epi8(
4207            8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
4208        );
4209        assert_eq_m128i(r, e);
4210    }
4211
4212    #[simd_test(enable = "sse2")]
4213    unsafe fn test_mm_unpackhi_epi16() {
4214        let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4215        let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4216        let r = _mm_unpackhi_epi16(a, b);
4217        let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15);
4218        assert_eq_m128i(r, e);
4219    }
4220
4221    #[simd_test(enable = "sse2")]
4222    unsafe fn test_mm_unpackhi_epi32() {
4223        let a = _mm_setr_epi32(0, 1, 2, 3);
4224        let b = _mm_setr_epi32(4, 5, 6, 7);
4225        let r = _mm_unpackhi_epi32(a, b);
4226        let e = _mm_setr_epi32(2, 6, 3, 7);
4227        assert_eq_m128i(r, e);
4228    }
4229
4230    #[simd_test(enable = "sse2")]
4231    unsafe fn test_mm_unpackhi_epi64() {
4232        let a = _mm_setr_epi64x(0, 1);
4233        let b = _mm_setr_epi64x(2, 3);
4234        let r = _mm_unpackhi_epi64(a, b);
4235        let e = _mm_setr_epi64x(1, 3);
4236        assert_eq_m128i(r, e);
4237    }
4238
4239    #[simd_test(enable = "sse2")]
4240    unsafe fn test_mm_unpacklo_epi8() {
4241        #[rustfmt::skip]
4242        let a = _mm_setr_epi8(
4243            0, 1, 2, 3, 4, 5, 6, 7,
4244            8, 9, 10, 11, 12, 13, 14, 15,
4245        );
4246        #[rustfmt::skip]
4247        let b = _mm_setr_epi8(
4248            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4249        );
4250        let r = _mm_unpacklo_epi8(a, b);
4251        #[rustfmt::skip]
4252        let e = _mm_setr_epi8(
4253            0, 16, 1, 17, 2, 18, 3, 19,
4254            4, 20, 5, 21, 6, 22, 7, 23,
4255        );
4256        assert_eq_m128i(r, e);
4257    }
4258
4259    #[simd_test(enable = "sse2")]
4260    unsafe fn test_mm_unpacklo_epi16() {
4261        let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4262        let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4263        let r = _mm_unpacklo_epi16(a, b);
4264        let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11);
4265        assert_eq_m128i(r, e);
4266    }
4267
4268    #[simd_test(enable = "sse2")]
4269    unsafe fn test_mm_unpacklo_epi32() {
4270        let a = _mm_setr_epi32(0, 1, 2, 3);
4271        let b = _mm_setr_epi32(4, 5, 6, 7);
4272        let r = _mm_unpacklo_epi32(a, b);
4273        let e = _mm_setr_epi32(0, 4, 1, 5);
4274        assert_eq_m128i(r, e);
4275    }
4276
4277    #[simd_test(enable = "sse2")]
4278    unsafe fn test_mm_unpacklo_epi64() {
4279        let a = _mm_setr_epi64x(0, 1);
4280        let b = _mm_setr_epi64x(2, 3);
4281        let r = _mm_unpacklo_epi64(a, b);
4282        let e = _mm_setr_epi64x(0, 2);
4283        assert_eq_m128i(r, e);
4284    }
4285
4286    #[simd_test(enable = "sse2")]
4287    unsafe fn test_mm_add_sd() {
4288        let a = _mm_setr_pd(1.0, 2.0);
4289        let b = _mm_setr_pd(5.0, 10.0);
4290        let r = _mm_add_sd(a, b);
4291        assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0));
4292    }
4293
4294    #[simd_test(enable = "sse2")]
4295    unsafe fn test_mm_add_pd() {
4296        let a = _mm_setr_pd(1.0, 2.0);
4297        let b = _mm_setr_pd(5.0, 10.0);
4298        let r = _mm_add_pd(a, b);
4299        assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
4300    }
4301
4302    #[simd_test(enable = "sse2")]
4303    unsafe fn test_mm_div_sd() {
4304        let a = _mm_setr_pd(1.0, 2.0);
4305        let b = _mm_setr_pd(5.0, 10.0);
4306        let r = _mm_div_sd(a, b);
4307        assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0));
4308    }
4309
4310    #[simd_test(enable = "sse2")]
4311    unsafe fn test_mm_div_pd() {
4312        let a = _mm_setr_pd(1.0, 2.0);
4313        let b = _mm_setr_pd(5.0, 10.0);
4314        let r = _mm_div_pd(a, b);
4315        assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2));
4316    }
4317
4318    #[simd_test(enable = "sse2")]
4319    unsafe fn test_mm_max_sd() {
4320        let a = _mm_setr_pd(1.0, 2.0);
4321        let b = _mm_setr_pd(5.0, 10.0);
4322        let r = _mm_max_sd(a, b);
4323        assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4324    }
4325
4326    #[simd_test(enable = "sse2")]
4327    unsafe fn test_mm_max_pd() {
4328        let a = _mm_setr_pd(1.0, 2.0);
4329        let b = _mm_setr_pd(5.0, 10.0);
4330        let r = _mm_max_pd(a, b);
4331        assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0));
4332
4333        let a = _mm_setr_pd(-0.0, 0.0);
4335        let b = _mm_setr_pd(0.0, 0.0);
4336        let r1: [u8; 16] = transmute(_mm_max_pd(a, b));
4337        let r2: [u8; 16] = transmute(_mm_max_pd(b, a));
4338        let a: [u8; 16] = transmute(a);
4339        let b: [u8; 16] = transmute(b);
4340        assert_eq!(r1, b);
4341        assert_eq!(r2, a);
4342        assert_ne!(a, b); }
4344
4345    #[simd_test(enable = "sse2")]
4346    unsafe fn test_mm_min_sd() {
4347        let a = _mm_setr_pd(1.0, 2.0);
4348        let b = _mm_setr_pd(5.0, 10.0);
4349        let r = _mm_min_sd(a, b);
4350        assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4351    }
4352
4353    #[simd_test(enable = "sse2")]
4354    unsafe fn test_mm_min_pd() {
4355        let a = _mm_setr_pd(1.0, 2.0);
4356        let b = _mm_setr_pd(5.0, 10.0);
4357        let r = _mm_min_pd(a, b);
4358        assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4359
4360        let a = _mm_setr_pd(-0.0, 0.0);
4362        let b = _mm_setr_pd(0.0, 0.0);
4363        let r1: [u8; 16] = transmute(_mm_min_pd(a, b));
4364        let r2: [u8; 16] = transmute(_mm_min_pd(b, a));
4365        let a: [u8; 16] = transmute(a);
4366        let b: [u8; 16] = transmute(b);
4367        assert_eq!(r1, b);
4368        assert_eq!(r2, a);
4369        assert_ne!(a, b); }
4371
4372    #[simd_test(enable = "sse2")]
4373    unsafe fn test_mm_mul_sd() {
4374        let a = _mm_setr_pd(1.0, 2.0);
4375        let b = _mm_setr_pd(5.0, 10.0);
4376        let r = _mm_mul_sd(a, b);
4377        assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4378    }
4379
4380    #[simd_test(enable = "sse2")]
4381    unsafe fn test_mm_mul_pd() {
4382        let a = _mm_setr_pd(1.0, 2.0);
4383        let b = _mm_setr_pd(5.0, 10.0);
4384        let r = _mm_mul_pd(a, b);
4385        assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0));
4386    }
4387
4388    #[simd_test(enable = "sse2")]
4389    unsafe fn test_mm_sqrt_sd() {
4390        let a = _mm_setr_pd(1.0, 2.0);
4391        let b = _mm_setr_pd(5.0, 10.0);
4392        let r = _mm_sqrt_sd(a, b);
4393        assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
4394    }
4395
4396    #[simd_test(enable = "sse2")]
4397    unsafe fn test_mm_sqrt_pd() {
4398        let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
4399        assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
4400    }
4401
4402    #[simd_test(enable = "sse2")]
4403    unsafe fn test_mm_sub_sd() {
4404        let a = _mm_setr_pd(1.0, 2.0);
4405        let b = _mm_setr_pd(5.0, 10.0);
4406        let r = _mm_sub_sd(a, b);
4407        assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0));
4408    }
4409
4410    #[simd_test(enable = "sse2")]
4411    unsafe fn test_mm_sub_pd() {
4412        let a = _mm_setr_pd(1.0, 2.0);
4413        let b = _mm_setr_pd(5.0, 10.0);
4414        let r = _mm_sub_pd(a, b);
4415        assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0));
4416    }
4417
4418    #[simd_test(enable = "sse2")]
4419    unsafe fn test_mm_and_pd() {
4420        let a = transmute(u64x2::splat(5));
4421        let b = transmute(u64x2::splat(3));
4422        let r = _mm_and_pd(a, b);
4423        let e = transmute(u64x2::splat(1));
4424        assert_eq_m128d(r, e);
4425    }
4426
4427    #[simd_test(enable = "sse2")]
4428    unsafe fn test_mm_andnot_pd() {
4429        let a = transmute(u64x2::splat(5));
4430        let b = transmute(u64x2::splat(3));
4431        let r = _mm_andnot_pd(a, b);
4432        let e = transmute(u64x2::splat(2));
4433        assert_eq_m128d(r, e);
4434    }
4435
4436    #[simd_test(enable = "sse2")]
4437    unsafe fn test_mm_or_pd() {
4438        let a = transmute(u64x2::splat(5));
4439        let b = transmute(u64x2::splat(3));
4440        let r = _mm_or_pd(a, b);
4441        let e = transmute(u64x2::splat(7));
4442        assert_eq_m128d(r, e);
4443    }
4444
4445    #[simd_test(enable = "sse2")]
4446    unsafe fn test_mm_xor_pd() {
4447        let a = transmute(u64x2::splat(5));
4448        let b = transmute(u64x2::splat(3));
4449        let r = _mm_xor_pd(a, b);
4450        let e = transmute(u64x2::splat(6));
4451        assert_eq_m128d(r, e);
4452    }
4453
4454    #[simd_test(enable = "sse2")]
4455    unsafe fn test_mm_cmpeq_sd() {
4456        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4457        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4458        let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
4459        assert_eq_m128i(r, e);
4460    }
4461
4462    #[simd_test(enable = "sse2")]
4463    unsafe fn test_mm_cmplt_sd() {
4464        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4465        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4466        let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
4467        assert_eq_m128i(r, e);
4468    }
4469
4470    #[simd_test(enable = "sse2")]
4471    unsafe fn test_mm_cmple_sd() {
4472        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4473        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4474        let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
4475        assert_eq_m128i(r, e);
4476    }
4477
4478    #[simd_test(enable = "sse2")]
4479    unsafe fn test_mm_cmpgt_sd() {
4480        let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4481        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4482        let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
4483        assert_eq_m128i(r, e);
4484    }
4485
4486    #[simd_test(enable = "sse2")]
4487    unsafe fn test_mm_cmpge_sd() {
4488        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4489        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4490        let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
4491        assert_eq_m128i(r, e);
4492    }
4493
4494    #[simd_test(enable = "sse2")]
4495    unsafe fn test_mm_cmpord_sd() {
4496        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4497        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4498        let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
4499        assert_eq_m128i(r, e);
4500    }
4501
4502    #[simd_test(enable = "sse2")]
4503    unsafe fn test_mm_cmpunord_sd() {
4504        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4505        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4506        let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
4507        assert_eq_m128i(r, e);
4508    }
4509
4510    #[simd_test(enable = "sse2")]
4511    unsafe fn test_mm_cmpneq_sd() {
4512        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4513        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4514        let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
4515        assert_eq_m128i(r, e);
4516    }
4517
4518    #[simd_test(enable = "sse2")]
4519    unsafe fn test_mm_cmpnlt_sd() {
4520        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4521        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4522        let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
4523        assert_eq_m128i(r, e);
4524    }
4525
4526    #[simd_test(enable = "sse2")]
4527    unsafe fn test_mm_cmpnle_sd() {
4528        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4529        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4530        let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
4531        assert_eq_m128i(r, e);
4532    }
4533
4534    #[simd_test(enable = "sse2")]
4535    unsafe fn test_mm_cmpngt_sd() {
4536        let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4537        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4538        let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
4539        assert_eq_m128i(r, e);
4540    }
4541
4542    #[simd_test(enable = "sse2")]
4543    unsafe fn test_mm_cmpnge_sd() {
4544        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4545        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4546        let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
4547        assert_eq_m128i(r, e);
4548    }
4549
4550    #[simd_test(enable = "sse2")]
4551    unsafe fn test_mm_cmpeq_pd() {
4552        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4553        let e = _mm_setr_epi64x(!0, 0);
4554        let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
4555        assert_eq_m128i(r, e);
4556    }
4557
4558    #[simd_test(enable = "sse2")]
4559    unsafe fn test_mm_cmplt_pd() {
4560        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4561        let e = _mm_setr_epi64x(0, !0);
4562        let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
4563        assert_eq_m128i(r, e);
4564    }
4565
4566    #[simd_test(enable = "sse2")]
4567    unsafe fn test_mm_cmple_pd() {
4568        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4569        let e = _mm_setr_epi64x(!0, !0);
4570        let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
4571        assert_eq_m128i(r, e);
4572    }
4573
4574    #[simd_test(enable = "sse2")]
4575    unsafe fn test_mm_cmpgt_pd() {
4576        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4577        let e = _mm_setr_epi64x(0, 0);
4578        let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
4579        assert_eq_m128i(r, e);
4580    }
4581
4582    #[simd_test(enable = "sse2")]
4583    unsafe fn test_mm_cmpge_pd() {
4584        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4585        let e = _mm_setr_epi64x(!0, 0);
4586        let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
4587        assert_eq_m128i(r, e);
4588    }
4589
4590    #[simd_test(enable = "sse2")]
4591    unsafe fn test_mm_cmpord_pd() {
4592        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4593        let e = _mm_setr_epi64x(0, !0);
4594        let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
4595        assert_eq_m128i(r, e);
4596    }
4597
4598    #[simd_test(enable = "sse2")]
4599    unsafe fn test_mm_cmpunord_pd() {
4600        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4601        let e = _mm_setr_epi64x(!0, 0);
4602        let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
4603        assert_eq_m128i(r, e);
4604    }
4605
4606    #[simd_test(enable = "sse2")]
4607    unsafe fn test_mm_cmpneq_pd() {
4608        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4609        let e = _mm_setr_epi64x(!0, !0);
4610        let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
4611        assert_eq_m128i(r, e);
4612    }
4613
4614    #[simd_test(enable = "sse2")]
4615    unsafe fn test_mm_cmpnlt_pd() {
4616        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4617        let e = _mm_setr_epi64x(0, 0);
4618        let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
4619        assert_eq_m128i(r, e);
4620    }
4621
4622    #[simd_test(enable = "sse2")]
4623    unsafe fn test_mm_cmpnle_pd() {
4624        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4625        let e = _mm_setr_epi64x(0, 0);
4626        let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
4627        assert_eq_m128i(r, e);
4628    }
4629
4630    #[simd_test(enable = "sse2")]
4631    unsafe fn test_mm_cmpngt_pd() {
4632        let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4633        let e = _mm_setr_epi64x(0, !0);
4634        let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
4635        assert_eq_m128i(r, e);
4636    }
4637
4638    #[simd_test(enable = "sse2")]
4639    unsafe fn test_mm_cmpnge_pd() {
4640        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4641        let e = _mm_setr_epi64x(0, !0);
4642        let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
4643        assert_eq_m128i(r, e);
4644    }
4645
4646    #[simd_test(enable = "sse2")]
4647    unsafe fn test_mm_comieq_sd() {
4648        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4649        assert!(_mm_comieq_sd(a, b) != 0);
4650
4651        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
4652        assert!(_mm_comieq_sd(a, b) == 0);
4653    }
4654
4655    #[simd_test(enable = "sse2")]
4656    unsafe fn test_mm_comilt_sd() {
4657        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4658        assert!(_mm_comilt_sd(a, b) == 0);
4659    }
4660
4661    #[simd_test(enable = "sse2")]
4662    unsafe fn test_mm_comile_sd() {
4663        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4664        assert!(_mm_comile_sd(a, b) != 0);
4665    }
4666
4667    #[simd_test(enable = "sse2")]
4668    unsafe fn test_mm_comigt_sd() {
4669        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4670        assert!(_mm_comigt_sd(a, b) == 0);
4671    }
4672
4673    #[simd_test(enable = "sse2")]
4674    unsafe fn test_mm_comige_sd() {
4675        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4676        assert!(_mm_comige_sd(a, b) != 0);
4677    }
4678
4679    #[simd_test(enable = "sse2")]
4680    unsafe fn test_mm_comineq_sd() {
4681        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4682        assert!(_mm_comineq_sd(a, b) == 0);
4683    }
4684
4685    #[simd_test(enable = "sse2")]
4686    unsafe fn test_mm_ucomieq_sd() {
4687        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4688        assert!(_mm_ucomieq_sd(a, b) != 0);
4689
4690        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
4691        assert!(_mm_ucomieq_sd(a, b) == 0);
4692    }
4693
4694    #[simd_test(enable = "sse2")]
4695    unsafe fn test_mm_ucomilt_sd() {
4696        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4697        assert!(_mm_ucomilt_sd(a, b) == 0);
4698    }
4699
4700    #[simd_test(enable = "sse2")]
4701    unsafe fn test_mm_ucomile_sd() {
4702        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4703        assert!(_mm_ucomile_sd(a, b) != 0);
4704    }
4705
4706    #[simd_test(enable = "sse2")]
4707    unsafe fn test_mm_ucomigt_sd() {
4708        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4709        assert!(_mm_ucomigt_sd(a, b) == 0);
4710    }
4711
4712    #[simd_test(enable = "sse2")]
4713    unsafe fn test_mm_ucomige_sd() {
4714        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4715        assert!(_mm_ucomige_sd(a, b) != 0);
4716    }
4717
4718    #[simd_test(enable = "sse2")]
4719    unsafe fn test_mm_ucomineq_sd() {
4720        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4721        assert!(_mm_ucomineq_sd(a, b) == 0);
4722    }
4723
4724    #[simd_test(enable = "sse2")]
4725    unsafe fn test_mm_movemask_pd() {
4726        let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
4727        assert_eq!(r, 0b01);
4728
4729        let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
4730        assert_eq!(r, 0b11);
4731    }
4732
4733    #[repr(align(16))]
4734    struct Memory {
4735        data: [f64; 4],
4736    }
4737
4738    #[simd_test(enable = "sse2")]
4739    unsafe fn test_mm_load_pd() {
4740        let mem = Memory {
4741            data: [1.0f64, 2.0, 3.0, 4.0],
4742        };
4743        let vals = &mem.data;
4744        let d = vals.as_ptr();
4745
4746        let r = _mm_load_pd(d);
4747        assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4748    }
4749
4750    #[simd_test(enable = "sse2")]
4751    unsafe fn test_mm_load_sd() {
4752        let a = 1.;
4753        let expected = _mm_setr_pd(a, 0.);
4754        let r = _mm_load_sd(&a);
4755        assert_eq_m128d(r, expected);
4756    }
4757
4758    #[simd_test(enable = "sse2")]
4759    unsafe fn test_mm_loadh_pd() {
4760        let a = _mm_setr_pd(1., 2.);
4761        let b = 3.;
4762        let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
4763        let r = _mm_loadh_pd(a, &b);
4764        assert_eq_m128d(r, expected);
4765    }
4766
4767    #[simd_test(enable = "sse2")]
4768    unsafe fn test_mm_loadl_pd() {
4769        let a = _mm_setr_pd(1., 2.);
4770        let b = 3.;
4771        let expected = _mm_setr_pd(3., get_m128d(a, 1));
4772        let r = _mm_loadl_pd(a, &b);
4773        assert_eq_m128d(r, expected);
4774    }
4775
4776    #[simd_test(enable = "sse2")]
4777    #[cfg_attr(miri, ignore)]
4780    unsafe fn test_mm_stream_pd() {
4781        #[repr(align(128))]
4782        struct Memory {
4783            pub data: [f64; 2],
4784        }
4785        let a = _mm_set1_pd(7.0);
4786        let mut mem = Memory { data: [-1.0; 2] };
4787
4788        _mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
4789        for i in 0..2 {
4790            assert_eq!(mem.data[i], get_m128d(a, i));
4791        }
4792    }
4793
4794    #[simd_test(enable = "sse2")]
4795    unsafe fn test_mm_store_sd() {
4796        let mut dest = 0.;
4797        let a = _mm_setr_pd(1., 2.);
4798        _mm_store_sd(&mut dest, a);
4799        assert_eq!(dest, _mm_cvtsd_f64(a));
4800    }
4801
4802    #[simd_test(enable = "sse2")]
4803    unsafe fn test_mm_store_pd() {
4804        let mut mem = Memory { data: [0.0f64; 4] };
4805        let vals = &mut mem.data;
4806        let a = _mm_setr_pd(1.0, 2.0);
4807        let d = vals.as_mut_ptr();
4808
4809        _mm_store_pd(d, *black_box(&a));
4810        assert_eq!(vals[0], 1.0);
4811        assert_eq!(vals[1], 2.0);
4812    }
4813
4814    #[simd_test(enable = "sse2")]
4815    unsafe fn test_mm_storeu_pd() {
4816        let mut mem = Memory { data: [0.0f64; 4] };
4817        let vals = &mut mem.data;
4818        let a = _mm_setr_pd(1.0, 2.0);
4819
4820        let mut ofs = 0;
4821        let mut p = vals.as_mut_ptr();
4822
4823        if (p as usize) & 0xf == 0 {
4825            ofs = 1;
4826            p = p.add(1);
4827        }
4828
4829        _mm_storeu_pd(p, *black_box(&a));
4830
4831        if ofs > 0 {
4832            assert_eq!(vals[ofs - 1], 0.0);
4833        }
4834        assert_eq!(vals[ofs + 0], 1.0);
4835        assert_eq!(vals[ofs + 1], 2.0);
4836    }
4837
4838    #[simd_test(enable = "sse2")]
4839    unsafe fn test_mm_storeu_si16() {
4840        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
4841        let mut r = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
4842        _mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a);
4843        let e = _mm_setr_epi16(1, 10, 11, 12, 13, 14, 15, 16);
4844        assert_eq_m128i(r, e);
4845    }
4846
4847    #[simd_test(enable = "sse2")]
4848    unsafe fn test_mm_storeu_si32() {
4849        let a = _mm_setr_epi32(1, 2, 3, 4);
4850        let mut r = _mm_setr_epi32(5, 6, 7, 8);
4851        _mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a);
4852        let e = _mm_setr_epi32(1, 6, 7, 8);
4853        assert_eq_m128i(r, e);
4854    }
4855
4856    #[simd_test(enable = "sse2")]
4857    unsafe fn test_mm_storeu_si64() {
4858        let a = _mm_setr_epi64x(1, 2);
4859        let mut r = _mm_setr_epi64x(3, 4);
4860        _mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a);
4861        let e = _mm_setr_epi64x(1, 4);
4862        assert_eq_m128i(r, e);
4863    }
4864
4865    #[simd_test(enable = "sse2")]
4866    unsafe fn test_mm_store1_pd() {
4867        let mut mem = Memory { data: [0.0f64; 4] };
4868        let vals = &mut mem.data;
4869        let a = _mm_setr_pd(1.0, 2.0);
4870        let d = vals.as_mut_ptr();
4871
4872        _mm_store1_pd(d, *black_box(&a));
4873        assert_eq!(vals[0], 1.0);
4874        assert_eq!(vals[1], 1.0);
4875    }
4876
4877    #[simd_test(enable = "sse2")]
4878    unsafe fn test_mm_store_pd1() {
4879        let mut mem = Memory { data: [0.0f64; 4] };
4880        let vals = &mut mem.data;
4881        let a = _mm_setr_pd(1.0, 2.0);
4882        let d = vals.as_mut_ptr();
4883
4884        _mm_store_pd1(d, *black_box(&a));
4885        assert_eq!(vals[0], 1.0);
4886        assert_eq!(vals[1], 1.0);
4887    }
4888
4889    #[simd_test(enable = "sse2")]
4890    unsafe fn test_mm_storer_pd() {
4891        let mut mem = Memory { data: [0.0f64; 4] };
4892        let vals = &mut mem.data;
4893        let a = _mm_setr_pd(1.0, 2.0);
4894        let d = vals.as_mut_ptr();
4895
4896        _mm_storer_pd(d, *black_box(&a));
4897        assert_eq!(vals[0], 2.0);
4898        assert_eq!(vals[1], 1.0);
4899    }
4900
4901    #[simd_test(enable = "sse2")]
4902    unsafe fn test_mm_storeh_pd() {
4903        let mut dest = 0.;
4904        let a = _mm_setr_pd(1., 2.);
4905        _mm_storeh_pd(&mut dest, a);
4906        assert_eq!(dest, get_m128d(a, 1));
4907    }
4908
4909    #[simd_test(enable = "sse2")]
4910    unsafe fn test_mm_storel_pd() {
4911        let mut dest = 0.;
4912        let a = _mm_setr_pd(1., 2.);
4913        _mm_storel_pd(&mut dest, a);
4914        assert_eq!(dest, _mm_cvtsd_f64(a));
4915    }
4916
4917    #[simd_test(enable = "sse2")]
4918    unsafe fn test_mm_loadr_pd() {
4919        let mut mem = Memory {
4920            data: [1.0f64, 2.0, 3.0, 4.0],
4921        };
4922        let vals = &mut mem.data;
4923        let d = vals.as_ptr();
4924
4925        let r = _mm_loadr_pd(d);
4926        assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
4927    }
4928
4929    #[simd_test(enable = "sse2")]
4930    unsafe fn test_mm_loadu_pd() {
4931        let mut mem = Memory {
4932            data: [1.0f64, 2.0, 3.0, 4.0],
4933        };
4934        let vals = &mut mem.data;
4935        let mut d = vals.as_ptr();
4936
4937        let mut offset = 0;
4939        if (d as usize) & 0xf == 0 {
4940            offset = 1;
4941            d = d.add(offset);
4942        }
4943
4944        let r = _mm_loadu_pd(d);
4945        let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64));
4946        assert_eq_m128d(r, e);
4947    }
4948
4949    #[simd_test(enable = "sse2")]
4950    unsafe fn test_mm_loadu_si16() {
4951        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
4952        let r = _mm_loadu_si16(ptr::addr_of!(a) as *const _);
4953        assert_eq_m128i(r, _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0));
4954    }
4955
4956    #[simd_test(enable = "sse2")]
4957    unsafe fn test_mm_loadu_si32() {
4958        let a = _mm_setr_epi32(1, 2, 3, 4);
4959        let r = _mm_loadu_si32(ptr::addr_of!(a) as *const _);
4960        assert_eq_m128i(r, _mm_setr_epi32(1, 0, 0, 0));
4961    }
4962
4963    #[simd_test(enable = "sse2")]
4964    unsafe fn test_mm_loadu_si64() {
4965        let a = _mm_setr_epi64x(5, 6);
4966        let r = _mm_loadu_si64(ptr::addr_of!(a) as *const _);
4967        assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4968    }
4969
4970    #[simd_test(enable = "sse2")]
4971    unsafe fn test_mm_cvtpd_ps() {
4972        let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
4973        assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
4974
4975        let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
4976        assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
4977
4978        let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
4979        assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
4980
4981        let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
4982        assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
4983    }
4984
4985    #[simd_test(enable = "sse2")]
4986    unsafe fn test_mm_cvtps_pd() {
4987        let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
4988        assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
4989
4990        let r = _mm_cvtps_pd(_mm_setr_ps(
4991            f32::MAX,
4992            f32::INFINITY,
4993            f32::NEG_INFINITY,
4994            f32::MIN,
4995        ));
4996        assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
4997    }
4998
4999    #[simd_test(enable = "sse2")]
5000    unsafe fn test_mm_cvtpd_epi32() {
5001        let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
5002        assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
5003
5004        let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
5005        assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
5006
5007        let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
5008        assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5009
5010        let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
5011        assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5012
5013        let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
5014        assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5015    }
5016
5017    #[simd_test(enable = "sse2")]
5018    unsafe fn test_mm_cvtsd_si32() {
5019        let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
5020        assert_eq!(r, -2);
5021
5022        let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
5023        assert_eq!(r, i32::MIN);
5024
5025        let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
5026        assert_eq!(r, i32::MIN);
5027    }
5028
5029    #[simd_test(enable = "sse2")]
5030    unsafe fn test_mm_cvtsd_ss() {
5031        let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
5032        let b = _mm_setr_pd(2.0, -5.0);
5033
5034        let r = _mm_cvtsd_ss(a, b);
5035
5036        assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
5037
5038        let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
5039        let b = _mm_setr_pd(f64::INFINITY, -5.0);
5040
5041        let r = _mm_cvtsd_ss(a, b);
5042
5043        assert_eq_m128(
5044            r,
5045            _mm_setr_ps(
5046                f32::INFINITY,
5047                f32::NEG_INFINITY,
5048                f32::MAX,
5049                f32::NEG_INFINITY,
5050            ),
5051        );
5052    }
5053
5054    #[simd_test(enable = "sse2")]
5055    unsafe fn test_mm_cvtsd_f64() {
5056        let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
5057        assert_eq!(r, -1.1);
5058    }
5059
5060    #[simd_test(enable = "sse2")]
5061    unsafe fn test_mm_cvtss_sd() {
5062        let a = _mm_setr_pd(-1.1, 2.2);
5063        let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
5064
5065        let r = _mm_cvtss_sd(a, b);
5066        assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
5067
5068        let a = _mm_setr_pd(-1.1, f64::INFINITY);
5069        let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
5070
5071        let r = _mm_cvtss_sd(a, b);
5072        assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
5073    }
5074
5075    #[simd_test(enable = "sse2")]
5076    unsafe fn test_mm_cvttpd_epi32() {
5077        let a = _mm_setr_pd(-1.1, 2.2);
5078        let r = _mm_cvttpd_epi32(a);
5079        assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
5080
5081        let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5082        let r = _mm_cvttpd_epi32(a);
5083        assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5084    }
5085
5086    #[simd_test(enable = "sse2")]
5087    unsafe fn test_mm_cvttsd_si32() {
5088        let a = _mm_setr_pd(-1.1, 2.2);
5089        let r = _mm_cvttsd_si32(a);
5090        assert_eq!(r, -1);
5091
5092        let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5093        let r = _mm_cvttsd_si32(a);
5094        assert_eq!(r, i32::MIN);
5095    }
5096
5097    #[simd_test(enable = "sse2")]
5098    unsafe fn test_mm_cvttps_epi32() {
5099        let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
5100        let r = _mm_cvttps_epi32(a);
5101        assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
5102
5103        let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
5104        let r = _mm_cvttps_epi32(a);
5105        assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
5106    }
5107
5108    #[simd_test(enable = "sse2")]
5109    unsafe fn test_mm_set_sd() {
5110        let r = _mm_set_sd(-1.0_f64);
5111        assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64));
5112    }
5113
5114    #[simd_test(enable = "sse2")]
5115    unsafe fn test_mm_set1_pd() {
5116        let r = _mm_set1_pd(-1.0_f64);
5117        assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64));
5118    }
5119
5120    #[simd_test(enable = "sse2")]
5121    unsafe fn test_mm_set_pd1() {
5122        let r = _mm_set_pd1(-2.0_f64);
5123        assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64));
5124    }
5125
5126    #[simd_test(enable = "sse2")]
5127    unsafe fn test_mm_set_pd() {
5128        let r = _mm_set_pd(1.0_f64, 5.0_f64);
5129        assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64));
5130    }
5131
5132    #[simd_test(enable = "sse2")]
5133    unsafe fn test_mm_setr_pd() {
5134        let r = _mm_setr_pd(1.0_f64, -5.0_f64);
5135        assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64));
5136    }
5137
5138    #[simd_test(enable = "sse2")]
5139    unsafe fn test_mm_setzero_pd() {
5140        let r = _mm_setzero_pd();
5141        assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64));
5142    }
5143
5144    #[simd_test(enable = "sse2")]
5145    unsafe fn test_mm_load1_pd() {
5146        let d = -5.0;
5147        let r = _mm_load1_pd(&d);
5148        assert_eq_m128d(r, _mm_setr_pd(d, d));
5149    }
5150
5151    #[simd_test(enable = "sse2")]
5152    unsafe fn test_mm_load_pd1() {
5153        let d = -5.0;
5154        let r = _mm_load_pd1(&d);
5155        assert_eq_m128d(r, _mm_setr_pd(d, d));
5156    }
5157
5158    #[simd_test(enable = "sse2")]
5159    unsafe fn test_mm_unpackhi_pd() {
5160        let a = _mm_setr_pd(1.0, 2.0);
5161        let b = _mm_setr_pd(3.0, 4.0);
5162        let r = _mm_unpackhi_pd(a, b);
5163        assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0));
5164    }
5165
5166    #[simd_test(enable = "sse2")]
5167    unsafe fn test_mm_unpacklo_pd() {
5168        let a = _mm_setr_pd(1.0, 2.0);
5169        let b = _mm_setr_pd(3.0, 4.0);
5170        let r = _mm_unpacklo_pd(a, b);
5171        assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0));
5172    }
5173
5174    #[simd_test(enable = "sse2")]
5175    unsafe fn test_mm_shuffle_pd() {
5176        let a = _mm_setr_pd(1., 2.);
5177        let b = _mm_setr_pd(3., 4.);
5178        let expected = _mm_setr_pd(1., 3.);
5179        let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b);
5180        assert_eq_m128d(r, expected);
5181    }
5182
5183    #[simd_test(enable = "sse2")]
5184    unsafe fn test_mm_move_sd() {
5185        let a = _mm_setr_pd(1., 2.);
5186        let b = _mm_setr_pd(3., 4.);
5187        let expected = _mm_setr_pd(3., 2.);
5188        let r = _mm_move_sd(a, b);
5189        assert_eq_m128d(r, expected);
5190    }
5191
5192    #[simd_test(enable = "sse2")]
5193    unsafe fn test_mm_castpd_ps() {
5194        let a = _mm_set1_pd(0.);
5195        let expected = _mm_set1_ps(0.);
5196        let r = _mm_castpd_ps(a);
5197        assert_eq_m128(r, expected);
5198    }
5199
5200    #[simd_test(enable = "sse2")]
5201    unsafe fn test_mm_castpd_si128() {
5202        let a = _mm_set1_pd(0.);
5203        let expected = _mm_set1_epi64x(0);
5204        let r = _mm_castpd_si128(a);
5205        assert_eq_m128i(r, expected);
5206    }
5207
5208    #[simd_test(enable = "sse2")]
5209    unsafe fn test_mm_castps_pd() {
5210        let a = _mm_set1_ps(0.);
5211        let expected = _mm_set1_pd(0.);
5212        let r = _mm_castps_pd(a);
5213        assert_eq_m128d(r, expected);
5214    }
5215
5216    #[simd_test(enable = "sse2")]
5217    unsafe fn test_mm_castps_si128() {
5218        let a = _mm_set1_ps(0.);
5219        let expected = _mm_set1_epi32(0);
5220        let r = _mm_castps_si128(a);
5221        assert_eq_m128i(r, expected);
5222    }
5223
5224    #[simd_test(enable = "sse2")]
5225    unsafe fn test_mm_castsi128_pd() {
5226        let a = _mm_set1_epi64x(0);
5227        let expected = _mm_set1_pd(0.);
5228        let r = _mm_castsi128_pd(a);
5229        assert_eq_m128d(r, expected);
5230    }
5231
5232    #[simd_test(enable = "sse2")]
5233    unsafe fn test_mm_castsi128_ps() {
5234        let a = _mm_set1_epi32(0);
5235        let expected = _mm_set1_ps(0.);
5236        let r = _mm_castsi128_ps(a);
5237        assert_eq_m128(r, expected);
5238    }
5239}