1use crate::core_arch::x86::*;
2use crate::intrinsics::simd::simd_select_bitmask;
3
4#[cfg(test)]
5use stdarch_test::assert_instr;
6
7#[inline]
15#[target_feature(enable = "avx512ifma")]
16#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17#[cfg_attr(test, assert_instr(vpmadd52huq))]
18pub fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
19    unsafe { vpmadd52huq_512(a, b, c) }
20}
21
22#[inline]
31#[target_feature(enable = "avx512ifma")]
32#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33#[cfg_attr(test, assert_instr(vpmadd52huq))]
34pub fn _mm512_mask_madd52hi_epu64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
35    unsafe { simd_select_bitmask(k, vpmadd52huq_512(a, b, c), a) }
36}
37
38#[inline]
47#[target_feature(enable = "avx512ifma")]
48#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
49#[cfg_attr(test, assert_instr(vpmadd52huq))]
50pub fn _mm512_maskz_madd52hi_epu64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
51    unsafe { simd_select_bitmask(k, vpmadd52huq_512(a, b, c), _mm512_setzero_si512()) }
52}
53
54#[inline]
62#[target_feature(enable = "avx512ifma")]
63#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
64#[cfg_attr(test, assert_instr(vpmadd52luq))]
65pub fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
66    unsafe { vpmadd52luq_512(a, b, c) }
67}
68
69#[inline]
78#[target_feature(enable = "avx512ifma")]
79#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
80#[cfg_attr(test, assert_instr(vpmadd52luq))]
81pub fn _mm512_mask_madd52lo_epu64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
82    unsafe { simd_select_bitmask(k, vpmadd52luq_512(a, b, c), a) }
83}
84
85#[inline]
94#[target_feature(enable = "avx512ifma")]
95#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
96#[cfg_attr(test, assert_instr(vpmadd52luq))]
97pub fn _mm512_maskz_madd52lo_epu64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
98    unsafe { simd_select_bitmask(k, vpmadd52luq_512(a, b, c), _mm512_setzero_si512()) }
99}
100
101#[inline]
109#[target_feature(enable = "avxifma")]
110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
111#[cfg_attr(
112    all(test, any(target_os = "linux", target_env = "msvc")),
113    assert_instr(vpmadd52huq)
114)]
115pub fn _mm256_madd52hi_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
116    unsafe { vpmadd52huq_256(a, b, c) }
117}
118
119#[inline]
127#[target_feature(enable = "avx512ifma,avx512vl")]
128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
129#[cfg_attr(test, assert_instr(vpmadd52huq))]
130pub fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
131    unsafe { vpmadd52huq_256(a, b, c) }
132}
133
134#[inline]
143#[target_feature(enable = "avx512ifma,avx512vl")]
144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
145#[cfg_attr(test, assert_instr(vpmadd52huq))]
146pub fn _mm256_mask_madd52hi_epu64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
147    unsafe { simd_select_bitmask(k, vpmadd52huq_256(a, b, c), a) }
148}
149
150#[inline]
159#[target_feature(enable = "avx512ifma,avx512vl")]
160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
161#[cfg_attr(test, assert_instr(vpmadd52huq))]
162pub fn _mm256_maskz_madd52hi_epu64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
163    unsafe { simd_select_bitmask(k, vpmadd52huq_256(a, b, c), _mm256_setzero_si256()) }
164}
165
166#[inline]
174#[target_feature(enable = "avxifma")]
175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
176#[cfg_attr(
177    all(test, any(target_os = "linux", target_env = "msvc")),
178    assert_instr(vpmadd52luq)
179)]
180pub fn _mm256_madd52lo_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
181    unsafe { vpmadd52luq_256(a, b, c) }
182}
183
184#[inline]
192#[target_feature(enable = "avx512ifma,avx512vl")]
193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
194#[cfg_attr(test, assert_instr(vpmadd52luq))]
195pub fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
196    unsafe { vpmadd52luq_256(a, b, c) }
197}
198
199#[inline]
208#[target_feature(enable = "avx512ifma,avx512vl")]
209#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
210#[cfg_attr(test, assert_instr(vpmadd52luq))]
211pub fn _mm256_mask_madd52lo_epu64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
212    unsafe { simd_select_bitmask(k, vpmadd52luq_256(a, b, c), a) }
213}
214
215#[inline]
224#[target_feature(enable = "avx512ifma,avx512vl")]
225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
226#[cfg_attr(test, assert_instr(vpmadd52luq))]
227pub fn _mm256_maskz_madd52lo_epu64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
228    unsafe { simd_select_bitmask(k, vpmadd52luq_256(a, b, c), _mm256_setzero_si256()) }
229}
230
231#[inline]
239#[target_feature(enable = "avxifma")]
240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
241#[cfg_attr(
242    all(test, any(target_os = "linux", target_env = "msvc")),
243    assert_instr(vpmadd52huq)
244)]
245pub fn _mm_madd52hi_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
246    unsafe { vpmadd52huq_128(a, b, c) }
247}
248
249#[inline]
257#[target_feature(enable = "avx512ifma,avx512vl")]
258#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
259#[cfg_attr(test, assert_instr(vpmadd52huq))]
260pub fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
261    unsafe { vpmadd52huq_128(a, b, c) }
262}
263
264#[inline]
273#[target_feature(enable = "avx512ifma,avx512vl")]
274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
275#[cfg_attr(test, assert_instr(vpmadd52huq))]
276pub fn _mm_mask_madd52hi_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
277    unsafe { simd_select_bitmask(k, vpmadd52huq_128(a, b, c), a) }
278}
279
280#[inline]
289#[target_feature(enable = "avx512ifma,avx512vl")]
290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
291#[cfg_attr(test, assert_instr(vpmadd52huq))]
292pub fn _mm_maskz_madd52hi_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
293    unsafe { simd_select_bitmask(k, vpmadd52huq_128(a, b, c), _mm_setzero_si128()) }
294}
295
296#[inline]
304#[target_feature(enable = "avxifma")]
305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
306#[cfg_attr(
307    all(test, any(target_os = "linux", target_env = "msvc")),
308    assert_instr(vpmadd52luq)
309)]
310pub fn _mm_madd52lo_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
311    unsafe { vpmadd52luq_128(a, b, c) }
312}
313
314#[inline]
322#[target_feature(enable = "avx512ifma,avx512vl")]
323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
324#[cfg_attr(test, assert_instr(vpmadd52luq))]
325pub fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
326    unsafe { vpmadd52luq_128(a, b, c) }
327}
328
329#[inline]
338#[target_feature(enable = "avx512ifma,avx512vl")]
339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
340#[cfg_attr(test, assert_instr(vpmadd52luq))]
341pub fn _mm_mask_madd52lo_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
342    unsafe { simd_select_bitmask(k, vpmadd52luq_128(a, b, c), a) }
343}
344
345#[inline]
354#[target_feature(enable = "avx512ifma,avx512vl")]
355#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
356#[cfg_attr(test, assert_instr(vpmadd52luq))]
357pub fn _mm_maskz_madd52lo_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
358    unsafe { simd_select_bitmask(k, vpmadd52luq_128(a, b, c), _mm_setzero_si128()) }
359}
360
361#[allow(improper_ctypes)]
362unsafe extern "C" {
363    #[link_name = "llvm.x86.avx512.vpmadd52l.uq.128"]
364    fn vpmadd52luq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
365    #[link_name = "llvm.x86.avx512.vpmadd52h.uq.128"]
366    fn vpmadd52huq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
367    #[link_name = "llvm.x86.avx512.vpmadd52l.uq.256"]
368    fn vpmadd52luq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
369    #[link_name = "llvm.x86.avx512.vpmadd52h.uq.256"]
370    fn vpmadd52huq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
371    #[link_name = "llvm.x86.avx512.vpmadd52l.uq.512"]
372    fn vpmadd52luq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
373    #[link_name = "llvm.x86.avx512.vpmadd52h.uq.512"]
374    fn vpmadd52huq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
375}
376
377#[cfg(test)]
378mod tests {
379
380    use stdarch_test::simd_test;
381
382    use crate::core_arch::x86::*;
383
384    const K: __mmask8 = 0b01101101;
385
386    #[simd_test(enable = "avx512ifma")]
387    unsafe fn test_mm512_madd52hi_epu64() {
388        let a = _mm512_set1_epi64(10 << 40);
389        let b = _mm512_set1_epi64((11 << 40) + 4);
390        let c = _mm512_set1_epi64((12 << 40) + 3);
391
392        let actual = _mm512_madd52hi_epu64(a, b, c);
393
394        let expected = _mm512_set1_epi64(11030549757952);
396
397        assert_eq_m512i(expected, actual);
398    }
399
400    #[simd_test(enable = "avx512ifma")]
401    unsafe fn test_mm512_mask_madd52hi_epu64() {
402        let a = _mm512_set1_epi64(10 << 40);
403        let b = _mm512_set1_epi64((11 << 40) + 4);
404        let c = _mm512_set1_epi64((12 << 40) + 3);
405
406        let actual = _mm512_mask_madd52hi_epu64(a, K, b, c);
407
408        let mut expected = _mm512_set1_epi64(11030549757952);
410        expected = _mm512_mask_blend_epi64(K, a, expected);
411
412        assert_eq_m512i(expected, actual);
413    }
414
415    #[simd_test(enable = "avx512ifma")]
416    unsafe fn test_mm512_maskz_madd52hi_epu64() {
417        let a = _mm512_set1_epi64(10 << 40);
418        let b = _mm512_set1_epi64((11 << 40) + 4);
419        let c = _mm512_set1_epi64((12 << 40) + 3);
420
421        let actual = _mm512_maskz_madd52hi_epu64(K, a, b, c);
422
423        let mut expected = _mm512_set1_epi64(11030549757952);
425        expected = _mm512_mask_blend_epi64(K, _mm512_setzero_si512(), expected);
426
427        assert_eq_m512i(expected, actual);
428    }
429
430    #[simd_test(enable = "avx512ifma")]
431    unsafe fn test_mm512_madd52lo_epu64() {
432        let a = _mm512_set1_epi64(10 << 40);
433        let b = _mm512_set1_epi64((11 << 40) + 4);
434        let c = _mm512_set1_epi64((12 << 40) + 3);
435
436        let actual = _mm512_madd52lo_epu64(a, b, c);
437
438        let expected = _mm512_set1_epi64(100055558127628);
440
441        assert_eq_m512i(expected, actual);
442    }
443
444    #[simd_test(enable = "avx512ifma")]
445    unsafe fn test_mm512_mask_madd52lo_epu64() {
446        let a = _mm512_set1_epi64(10 << 40);
447        let b = _mm512_set1_epi64((11 << 40) + 4);
448        let c = _mm512_set1_epi64((12 << 40) + 3);
449
450        let actual = _mm512_mask_madd52lo_epu64(a, K, b, c);
451
452        let mut expected = _mm512_set1_epi64(100055558127628);
454        expected = _mm512_mask_blend_epi64(K, a, expected);
455
456        assert_eq_m512i(expected, actual);
457    }
458
459    #[simd_test(enable = "avx512ifma")]
460    unsafe fn test_mm512_maskz_madd52lo_epu64() {
461        let a = _mm512_set1_epi64(10 << 40);
462        let b = _mm512_set1_epi64((11 << 40) + 4);
463        let c = _mm512_set1_epi64((12 << 40) + 3);
464
465        let actual = _mm512_maskz_madd52lo_epu64(K, a, b, c);
466
467        let mut expected = _mm512_set1_epi64(100055558127628);
469        expected = _mm512_mask_blend_epi64(K, _mm512_setzero_si512(), expected);
470
471        assert_eq_m512i(expected, actual);
472    }
473
474    #[simd_test(enable = "avxifma")]
475    unsafe fn test_mm256_madd52hi_avx_epu64() {
476        let a = _mm256_set1_epi64x(10 << 40);
477        let b = _mm256_set1_epi64x((11 << 40) + 4);
478        let c = _mm256_set1_epi64x((12 << 40) + 3);
479
480        let actual = _mm256_madd52hi_avx_epu64(a, b, c);
481
482        let expected = _mm256_set1_epi64x(11030549757952);
484
485        assert_eq_m256i(expected, actual);
486    }
487
488    #[simd_test(enable = "avx512ifma,avx512vl")]
489    unsafe fn test_mm256_madd52hi_epu64() {
490        let a = _mm256_set1_epi64x(10 << 40);
491        let b = _mm256_set1_epi64x((11 << 40) + 4);
492        let c = _mm256_set1_epi64x((12 << 40) + 3);
493
494        let actual = _mm256_madd52hi_epu64(a, b, c);
495
496        let expected = _mm256_set1_epi64x(11030549757952);
498
499        assert_eq_m256i(expected, actual);
500    }
501
502    #[simd_test(enable = "avx512ifma,avx512vl")]
503    unsafe fn test_mm256_mask_madd52hi_epu64() {
504        let a = _mm256_set1_epi64x(10 << 40);
505        let b = _mm256_set1_epi64x((11 << 40) + 4);
506        let c = _mm256_set1_epi64x((12 << 40) + 3);
507
508        let actual = _mm256_mask_madd52hi_epu64(a, K, b, c);
509
510        let mut expected = _mm256_set1_epi64x(11030549757952);
512        expected = _mm256_mask_blend_epi64(K, a, expected);
513
514        assert_eq_m256i(expected, actual);
515    }
516
517    #[simd_test(enable = "avx512ifma,avx512vl")]
518    unsafe fn test_mm256_maskz_madd52hi_epu64() {
519        let a = _mm256_set1_epi64x(10 << 40);
520        let b = _mm256_set1_epi64x((11 << 40) + 4);
521        let c = _mm256_set1_epi64x((12 << 40) + 3);
522
523        let actual = _mm256_maskz_madd52hi_epu64(K, a, b, c);
524
525        let mut expected = _mm256_set1_epi64x(11030549757952);
527        expected = _mm256_mask_blend_epi64(K, _mm256_setzero_si256(), expected);
528
529        assert_eq_m256i(expected, actual);
530    }
531
532    #[simd_test(enable = "avxifma")]
533    unsafe fn test_mm256_madd52lo_avx_epu64() {
534        let a = _mm256_set1_epi64x(10 << 40);
535        let b = _mm256_set1_epi64x((11 << 40) + 4);
536        let c = _mm256_set1_epi64x((12 << 40) + 3);
537
538        let actual = _mm256_madd52lo_avx_epu64(a, b, c);
539
540        let expected = _mm256_set1_epi64x(100055558127628);
542
543        assert_eq_m256i(expected, actual);
544    }
545
546    #[simd_test(enable = "avx512ifma,avx512vl")]
547    unsafe fn test_mm256_madd52lo_epu64() {
548        let a = _mm256_set1_epi64x(10 << 40);
549        let b = _mm256_set1_epi64x((11 << 40) + 4);
550        let c = _mm256_set1_epi64x((12 << 40) + 3);
551
552        let actual = _mm256_madd52lo_epu64(a, b, c);
553
554        let expected = _mm256_set1_epi64x(100055558127628);
556
557        assert_eq_m256i(expected, actual);
558    }
559
560    #[simd_test(enable = "avx512ifma,avx512vl")]
561    unsafe fn test_mm256_mask_madd52lo_epu64() {
562        let a = _mm256_set1_epi64x(10 << 40);
563        let b = _mm256_set1_epi64x((11 << 40) + 4);
564        let c = _mm256_set1_epi64x((12 << 40) + 3);
565
566        let actual = _mm256_mask_madd52lo_epu64(a, K, b, c);
567
568        let mut expected = _mm256_set1_epi64x(100055558127628);
570        expected = _mm256_mask_blend_epi64(K, a, expected);
571
572        assert_eq_m256i(expected, actual);
573    }
574
575    #[simd_test(enable = "avx512ifma,avx512vl")]
576    unsafe fn test_mm256_maskz_madd52lo_epu64() {
577        let a = _mm256_set1_epi64x(10 << 40);
578        let b = _mm256_set1_epi64x((11 << 40) + 4);
579        let c = _mm256_set1_epi64x((12 << 40) + 3);
580
581        let actual = _mm256_maskz_madd52lo_epu64(K, a, b, c);
582
583        let mut expected = _mm256_set1_epi64x(100055558127628);
585        expected = _mm256_mask_blend_epi64(K, _mm256_setzero_si256(), expected);
586
587        assert_eq_m256i(expected, actual);
588    }
589
590    #[simd_test(enable = "avxifma")]
591    unsafe fn test_mm_madd52hi_avx_epu64() {
592        let a = _mm_set1_epi64x(10 << 40);
593        let b = _mm_set1_epi64x((11 << 40) + 4);
594        let c = _mm_set1_epi64x((12 << 40) + 3);
595
596        let actual = _mm_madd52hi_avx_epu64(a, b, c);
597
598        let expected = _mm_set1_epi64x(11030549757952);
600
601        assert_eq_m128i(expected, actual);
602    }
603
604    #[simd_test(enable = "avx512ifma,avx512vl")]
605    unsafe fn test_mm_madd52hi_epu64() {
606        let a = _mm_set1_epi64x(10 << 40);
607        let b = _mm_set1_epi64x((11 << 40) + 4);
608        let c = _mm_set1_epi64x((12 << 40) + 3);
609
610        let actual = _mm_madd52hi_epu64(a, b, c);
611
612        let expected = _mm_set1_epi64x(11030549757952);
614
615        assert_eq_m128i(expected, actual);
616    }
617
618    #[simd_test(enable = "avx512ifma,avx512vl")]
619    unsafe fn test_mm_mask_madd52hi_epu64() {
620        let a = _mm_set1_epi64x(10 << 40);
621        let b = _mm_set1_epi64x((11 << 40) + 4);
622        let c = _mm_set1_epi64x((12 << 40) + 3);
623
624        let actual = _mm_mask_madd52hi_epu64(a, K, b, c);
625
626        let mut expected = _mm_set1_epi64x(11030549757952);
628        expected = _mm_mask_blend_epi64(K, a, expected);
629
630        assert_eq_m128i(expected, actual);
631    }
632
633    #[simd_test(enable = "avx512ifma,avx512vl")]
634    unsafe fn test_mm_maskz_madd52hi_epu64() {
635        let a = _mm_set1_epi64x(10 << 40);
636        let b = _mm_set1_epi64x((11 << 40) + 4);
637        let c = _mm_set1_epi64x((12 << 40) + 3);
638
639        let actual = _mm_maskz_madd52hi_epu64(K, a, b, c);
640
641        let mut expected = _mm_set1_epi64x(11030549757952);
643        expected = _mm_mask_blend_epi64(K, _mm_setzero_si128(), expected);
644
645        assert_eq_m128i(expected, actual);
646    }
647
648    #[simd_test(enable = "avxifma")]
649    unsafe fn test_mm_madd52lo_avx_epu64() {
650        let a = _mm_set1_epi64x(10 << 40);
651        let b = _mm_set1_epi64x((11 << 40) + 4);
652        let c = _mm_set1_epi64x((12 << 40) + 3);
653
654        let actual = _mm_madd52lo_avx_epu64(a, b, c);
655
656        let expected = _mm_set1_epi64x(100055558127628);
658
659        assert_eq_m128i(expected, actual);
660    }
661
662    #[simd_test(enable = "avx512ifma,avx512vl")]
663    unsafe fn test_mm_madd52lo_epu64() {
664        let a = _mm_set1_epi64x(10 << 40);
665        let b = _mm_set1_epi64x((11 << 40) + 4);
666        let c = _mm_set1_epi64x((12 << 40) + 3);
667
668        let actual = _mm_madd52lo_epu64(a, b, c);
669
670        let expected = _mm_set1_epi64x(100055558127628);
672
673        assert_eq_m128i(expected, actual);
674    }
675
676    #[simd_test(enable = "avx512ifma,avx512vl")]
677    unsafe fn test_mm_mask_madd52lo_epu64() {
678        let a = _mm_set1_epi64x(10 << 40);
679        let b = _mm_set1_epi64x((11 << 40) + 4);
680        let c = _mm_set1_epi64x((12 << 40) + 3);
681
682        let actual = _mm_mask_madd52lo_epu64(a, K, b, c);
683
684        let mut expected = _mm_set1_epi64x(100055558127628);
686        expected = _mm_mask_blend_epi64(K, a, expected);
687
688        assert_eq_m128i(expected, actual);
689    }
690
691    #[simd_test(enable = "avx512ifma,avx512vl")]
692    unsafe fn test_mm_maskz_madd52lo_epu64() {
693        let a = _mm_set1_epi64x(10 << 40);
694        let b = _mm_set1_epi64x((11 << 40) + 4);
695        let c = _mm_set1_epi64x((12 << 40) + 3);
696
697        let actual = _mm_maskz_madd52lo_epu64(K, a, b, c);
698
699        let mut expected = _mm_set1_epi64x(100055558127628);
701        expected = _mm_mask_blend_epi64(K, _mm_setzero_si128(), expected);
702
703        assert_eq_m128i(expected, actual);
704    }
705}