package core:simd/x86

⌘K
Ctrl+K
or
/

    Overview

    SIMD intrinsics specific to the Intel x86 and AMD64 architectures.

    Index

    Constants (89)
    Variables (0)

    This section is empty.

    Procedures (674)
    Procedure Groups (0)

    This section is empty.

    Types

    _MM_CMPINT_ENUM ¶

    _MM_CMPINT_ENUM :: i32
     

    The _MM_CMPINT_ENUM type used to specify comparison operations in AVX-512 intrinsics.

    _MM_MANTISSA_NORM_ENUM ¶

    _MM_MANTISSA_NORM_ENUM :: i32
     

    The MM_MANTISSA_NORM_ENUM type used to specify mantissa normalized operations in AVX-512 intrinsics.

    _MM_MANTISSA_SIGN_ENUM ¶

    _MM_MANTISSA_SIGN_ENUM :: i32
     

    The MM_MANTISSA_SIGN_ENUM type used to specify mantissa signed operations in AVX-512 intrinsics.

    _MM_PERM_ENUM ¶

    _MM_PERM_ENUM :: i32

    __m128 ¶

    __m128 :: #simd[4]f32

    __m128bh ¶

    __m128bh :: #simd[8]u16

    __m128d ¶

    __m128d :: #simd[2]f64

    __m128i ¶

    __m128i :: #simd[2]i64

    __m256 ¶

    __m256 :: #simd[8]f32

    __m256bh ¶

    __m256bh :: #simd[16]u16

    __m256d ¶

    __m256d :: #simd[4]f64

    __m256i ¶

    __m256i :: #simd[4]i64

    __m512 ¶

    __m512 :: #simd[16]f32

    __m512bh ¶

    __m512bh :: #simd[32]u16

    __m512d ¶

    __m512d :: #simd[8]f64

    __m512i ¶

    __m512i :: #simd[8]i64

    __mmask16 ¶

    __mmask16 :: u16
     

    The __mmask16 type used in AVX-512 intrinsics, a 16-bit integer

    __mmask32 ¶

    __mmask32 :: u32
     

    The __mmask32 type used in AVX-512 intrinsics, a 32-bit integer

    __mmask64 ¶

    __mmask64 :: u64
     

    The __mmask64 type used in AVX-512 intrinsics, a 64-bit integer

    __mmask8 ¶

    __mmask8 :: u8
     

    The __mmask8 type used in AVX-512 intrinsics, a 8-bit integer

    Constants

    _CMP_EQ_OQ ¶

    _CMP_EQ_OQ :: 0x00
     

    Equal (ordered, non-signaling)

    _CMP_EQ_OS ¶

    _CMP_EQ_OS :: 0x10
     

    Equal (ordered, signaling)

    _CMP_EQ_UQ ¶

    _CMP_EQ_UQ :: 0x08
     

    Equal (unordered, non-signaling)

    _CMP_EQ_US ¶

    _CMP_EQ_US :: 0x18
     

    Equal (unordered, signaling)

    _CMP_FALSE_OQ ¶

    _CMP_FALSE_OQ :: 0x0b
     

    False (ordered, non-signaling)

    _CMP_FALSE_OS ¶

    _CMP_FALSE_OS :: 0x1b
     

    False (ordered, signaling)

    _CMP_GE_OQ ¶

    _CMP_GE_OQ :: 0x1d
     

    Greater-than-or-equal (ordered, non-signaling)

    _CMP_GE_OS ¶

    _CMP_GE_OS :: 0x0d
     

    Greater-than-or-equal (ordered, signaling)

    _CMP_GT_OQ ¶

    _CMP_GT_OQ :: 0x1e
     

    Greater-than (ordered, non-signaling)

    _CMP_GT_OS ¶

    _CMP_GT_OS :: 0x0e
     

    Greater-than (ordered, signaling)

    _CMP_LE_OQ ¶

    _CMP_LE_OQ :: 0x12
     

    Less-than-or-equal (ordered, non-signaling)

    _CMP_LE_OS ¶

    _CMP_LE_OS :: 0x02
     

    Less-than-or-equal (ordered, signaling)

    _CMP_LT_OQ ¶

    _CMP_LT_OQ :: 0x11
     

    Less-than (ordered, non-signaling)

    _CMP_LT_OS ¶

    _CMP_LT_OS :: 0x01
     

    Less-than (ordered, signaling)

    _CMP_NEQ_OQ ¶

    _CMP_NEQ_OQ :: 0x0c
     

    Not-equal (ordered, non-signaling)

    _CMP_NEQ_OS ¶

    _CMP_NEQ_OS :: 0x1c
     

    Not-equal (ordered, signaling)

    _CMP_NEQ_UQ ¶

    _CMP_NEQ_UQ :: 0x04
     

    Not-equal (unordered, non-signaling)

    _CMP_NEQ_US ¶

    _CMP_NEQ_US :: 0x14
     

    Not-equal (unordered, signaling)

    _CMP_NGE_UQ ¶

    _CMP_NGE_UQ :: 0x19
     

    Not-greater-than-or-equal (unordered, non-signaling)

    _CMP_NGE_US ¶

    _CMP_NGE_US :: 0x09
     

    Not-greater-than-or-equal (unordered, signaling)

    _CMP_NGT_UQ ¶

    _CMP_NGT_UQ :: 0x1a
     

    Not-greater-than (unordered, non-signaling)

    _CMP_NGT_US ¶

    _CMP_NGT_US :: 0x0a
     

    Not-greater-than (unordered, signaling)

    _CMP_NLE_UQ ¶

    _CMP_NLE_UQ :: 0x16
     

    Not-less-than-or-equal (unordered, non-signaling)

    _CMP_NLE_US ¶

    _CMP_NLE_US :: 0x06
     

    Not-less-than-or-equal (unordered, signaling)

    _CMP_NLT_UQ ¶

    _CMP_NLT_UQ :: 0x15
     

    Not-less-than (unordered, non-signaling)

    _CMP_NLT_US ¶

    _CMP_NLT_US :: 0x05
     

    Not-less-than (unordered, signaling)

    _CMP_ORD_Q ¶

    _CMP_ORD_Q :: 0x07
     

    Ordered (non-signaling)

    _CMP_ORD_S ¶

    _CMP_ORD_S :: 0x17
     

    Ordered (signaling)

    _CMP_TRUE_UQ ¶

    _CMP_TRUE_UQ :: 0x0f
     

    True (unordered, non-signaling)

    _CMP_TRUE_US ¶

    _CMP_TRUE_US :: 0x1f
     

    True (unordered, signaling)

    _CMP_UNORD_Q ¶

    _CMP_UNORD_Q :: 0x03
     

    Unordered (non-signaling)

    _CMP_UNORD_S ¶

    _CMP_UNORD_S :: 0x13
     

    Unordered (signaling)

    _MM_EXCEPT_DENORM ¶

    _MM_EXCEPT_DENORM :: 0x0002

    _MM_EXCEPT_DIV_ZERO ¶

    _MM_EXCEPT_DIV_ZERO :: 0x0004

    _MM_EXCEPT_INEXACT ¶

    _MM_EXCEPT_INEXACT :: 0x0020

    _MM_EXCEPT_INVALID ¶

    _MM_EXCEPT_INVALID :: 0x0001

    _MM_EXCEPT_MASK ¶

    _MM_EXCEPT_MASK :: 0x003f

    _MM_EXCEPT_OVERFLOW ¶

    _MM_EXCEPT_OVERFLOW :: 0x0008

    _MM_EXCEPT_UNDERFLOW ¶

    _MM_EXCEPT_UNDERFLOW :: 0x0010

    _MM_FLUSH_ZERO_MASK ¶

    _MM_FLUSH_ZERO_MASK :: 0x8000

    _MM_FLUSH_ZERO_OFF ¶

    _MM_FLUSH_ZERO_OFF :: 0x0000

    _MM_FLUSH_ZERO_ON ¶

    _MM_FLUSH_ZERO_ON :: 0x8000

    _MM_FROUND_CEIL ¶

    _MM_FROUND_CEIL :: _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF

    _MM_FROUND_CUR_DIRECTION ¶

    _MM_FROUND_CUR_DIRECTION :: 0x04

    _MM_FROUND_FLOOR ¶

    _MM_FROUND_FLOOR :: _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF

    _MM_FROUND_NEARBYINT ¶

    _MM_FROUND_NEARBYINT :: _MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION

    _MM_FROUND_NINT ¶

    _MM_FROUND_NINT :: 0x00

    _MM_FROUND_NO_EXC ¶

    _MM_FROUND_NO_EXC :: 0x08

    _MM_FROUND_RAISE_EXC ¶

    _MM_FROUND_RAISE_EXC :: 0x00

    _MM_FROUND_RINT ¶

    _MM_FROUND_RINT :: _MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION

    _MM_FROUND_TO_NEAREST_INT ¶

    _MM_FROUND_TO_NEAREST_INT :: 0x00
     

    SSE4 rounding constants

    _MM_FROUND_TO_NEG_INF ¶

    _MM_FROUND_TO_NEG_INF :: 0x01

    _MM_FROUND_TO_POS_INF ¶

    _MM_FROUND_TO_POS_INF :: 0x02

    _MM_FROUND_TO_ZERO ¶

    _MM_FROUND_TO_ZERO :: 0x03

    _MM_FROUND_TRUNC ¶

    _MM_FROUND_TRUNC :: _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO

    _MM_HINT_ET0 ¶

    _MM_HINT_ET0 :: 7

    _MM_HINT_ET1 ¶

    _MM_HINT_ET1 :: 6

    _MM_HINT_NTA ¶

    _MM_HINT_NTA :: 0

    _MM_HINT_T0 ¶

    _MM_HINT_T0 :: 3

    _MM_HINT_T1 ¶

    _MM_HINT_T1 :: 2

    _MM_HINT_T2 ¶

    _MM_HINT_T2 :: 1

    _MM_MASK_DENORM ¶

    _MM_MASK_DENORM :: 0x0100

    _MM_MASK_DIV_ZERO ¶

    _MM_MASK_DIV_ZERO :: 0x0200

    _MM_MASK_INEXACT ¶

    _MM_MASK_INEXACT :: 0x1000

    _MM_MASK_INVALID ¶

    _MM_MASK_INVALID :: 0x0080

    _MM_MASK_MASK ¶

    _MM_MASK_MASK :: 0x1f80

    _MM_MASK_OVERFLOW ¶

    _MM_MASK_OVERFLOW :: 0x0400

    _MM_MASK_UNDERFLOW ¶

    _MM_MASK_UNDERFLOW :: 0x0800

    _MM_ROUND_DOWN ¶

    _MM_ROUND_DOWN :: 0x2000

    _MM_ROUND_MASK ¶

    _MM_ROUND_MASK :: 0x6000

    _MM_ROUND_NEAREST ¶

    _MM_ROUND_NEAREST :: 0x0000

    _MM_ROUND_TOWARD_ZERO ¶

    _MM_ROUND_TOWARD_ZERO :: 0x6000

    _MM_ROUND_UP ¶

    _MM_ROUND_UP :: 0x4000

    _SIDD_BIT_MASK ¶

    _SIDD_BIT_MASK :: 0b0000_0000

    _SIDD_CMP_EQUAL_ANY ¶

    _SIDD_CMP_EQUAL_ANY :: 0b0000_0000

    _SIDD_CMP_EQUAL_EACH ¶

    _SIDD_CMP_EQUAL_EACH :: 0b0000_1000

    _SIDD_CMP_EQUAL_ORDERED ¶

    _SIDD_CMP_EQUAL_ORDERED :: 0b0000_1100

    _SIDD_CMP_RANGES ¶

    _SIDD_CMP_RANGES :: 0b0000_0100

    _SIDD_LEAST_SIGNIFICANT ¶

    _SIDD_LEAST_SIGNIFICANT :: 0b0000_0000

    _SIDD_MASKED_NEGATIVE_POLARITY ¶

    _SIDD_MASKED_NEGATIVE_POLARITY :: 0b0011_0000

    _SIDD_MASKED_POSITIVE_POLARITY ¶

    _SIDD_MASKED_POSITIVE_POLARITY :: 0b0010_0000

    _SIDD_MOST_SIGNIFICANT ¶

    _SIDD_MOST_SIGNIFICANT :: 0b0100_0000

    _SIDD_NEGATIVE_POLARITY ¶

    _SIDD_NEGATIVE_POLARITY :: 0b0001_0000

    _SIDD_POSITIVE_POLARITY ¶

    _SIDD_POSITIVE_POLARITY :: 0b0000_0000

    _SIDD_SBYTE_OPS ¶

    _SIDD_SBYTE_OPS :: 0b0000_0010

    _SIDD_SWORD_OPS ¶

    _SIDD_SWORD_OPS :: 0b0000_0011

    _SIDD_UBYTE_OPS ¶

    _SIDD_UBYTE_OPS :: 0b0000_0000

    _SIDD_UNIT_MASK ¶

    _SIDD_UNIT_MASK :: 0b0100_0000

    _SIDD_UWORD_OPS ¶

    _SIDD_UWORD_OPS :: 0b0000_0001

    Variables

    This section is empty.

    Procedures

    _MM_GET_EXCEPTION_MASK ¶

    _MM_GET_EXCEPTION_MASK :: proc "c" () -> u32 {…}

    _MM_GET_EXCEPTION_STATE ¶

    _MM_GET_EXCEPTION_STATE :: proc "c" () -> u32 {…}

    _MM_GET_FLUSH_ZERO_MODE ¶

    _MM_GET_FLUSH_ZERO_MODE :: proc "c" () -> u32 {…}

    _MM_GET_ROUNDING_MODE ¶

    _MM_GET_ROUNDING_MODE :: proc "c" () -> u32 {…}

    _MM_SET_EXCEPTION_MASK ¶

    _MM_SET_EXCEPTION_MASK :: proc "c" (x: u32) {…}

    _MM_SET_EXCEPTION_STATE ¶

    _MM_SET_EXCEPTION_STATE :: proc "c" (x: u32) {…}

    _MM_SET_FLUSH_ZERO_MODE ¶

    _MM_SET_FLUSH_ZERO_MODE :: proc "c" (x: u32) {…}

    _MM_SET_ROUNDING_MODE ¶

    _MM_SET_ROUNDING_MODE :: proc "c" (x: u32) {…}

    _MM_SHUFFLE ¶

     

    _MM_SHUFFLE(z, y, x, w) -> (z<<6 | y<<4 | x<<2 | w)

    _MM_TRANSPOSE4_PS ¶

    _MM_TRANSPOSE4_PS :: proc "c" (row0, row1, row2, row3: ^#simd[4]f32) {…}

    __rdtscp ¶

    __rdtscp :: proc "c" (aux: ^u32) -> u64 {…}

    _addcarry_u32 ¶

    _addcarry_u32 :: proc "c" (c_in: u8, a: u32, b: u32, out: ^u32) -> u8 {…}

    _addcarry_u64 ¶

    _addcarry_u64 :: proc "c" (c_in: u8, a: u64, b: u64, out: ^u64) -> u8 {…}

    _addcarryx_u32 ¶

    _addcarryx_u32 :: proc "c" (c_in: u8, a: u32, b: u32, out: ^u32) -> u8 {…}

    _addcarryx_u64 ¶

    _addcarryx_u64 :: proc "c" (c_in: u8, a: u64, b: u64, out: ^u64) -> u8 {…}

    _andn_u32 ¶

    _andn_u32 :: proc "c" (a, b: u32) -> u32 {…}

    _andn_u64 ¶

    _andn_u64 :: proc "c" (a, b: u64) -> u64 {…}

    _bextr2_u32 ¶

    _bextr2_u32 :: proc "c" (a, control: u32) -> u32 {…}

    _bextr2_u64 ¶

    _bextr2_u64 :: proc "c" (a, control: u64) -> u64 {…}

    _bextr_u32 ¶

    _bextr_u32 :: proc "c" (a, start, len: u32) -> u32 {…}

    _bextr_u64 ¶

    _bextr_u64 :: proc "c" (a: u64, start, len: u32) -> u64 {…}

    _blsi_u32 ¶

    _blsi_u32 :: proc "c" (a: u32) -> u32 {…}

    _blsi_u64 ¶

    _blsi_u64 :: proc "c" (a: u64) -> u64 {…}

    _blsmsk_u32 ¶

    _blsmsk_u32 :: proc "c" (a: u32) -> u32 {…}

    _blsmsk_u64 ¶

    _blsmsk_u64 :: proc "c" (a: u64) -> u64 {…}

    _blsr_u32 ¶

    _blsr_u32 :: proc "c" (a: u32) -> u32 {…}

    _blsr_u64 ¶

    _blsr_u64 :: proc "c" (a: u64) -> u64 {…}

    _bzhi_u32 ¶

    _bzhi_u32 :: proc "c" (a, index: u32) -> u32 {…}

    _bzhi_u64 ¶

    _bzhi_u64 :: proc "c" (a, index: u64) -> u64 {…}

    _fxrstor ¶

    _fxrstor :: proc "c" (mem_addr: rawptr) {…}

    _fxrstor64 ¶

    _fxrstor64 :: proc "c" (mem_addr: rawptr) {…}

    _fxsave ¶

    _fxsave :: proc "c" (mem_addr: rawptr) {…}

    _fxsave64 ¶

    _fxsave64 :: proc "c" (mem_addr: rawptr) {…}

    _lzcnt_u32 ¶

    _lzcnt_u32 :: proc "c" (x: u32) -> u32 {…}

    _lzcnt_u64 ¶

    _lzcnt_u64 :: proc "c" (x: u64) -> u64 {…}

    _mm256_add_pd ¶

    _mm256_add_pd :: proc "c" (a, b: #simd[4]f64) -> #simd[4]f64 {…}
     

    Adds packed double-precision (64-bit) floating-point elements in a and b.

    _mm256_add_ps ¶

    _mm256_add_ps :: proc "c" (a, b: #simd[8]f32) -> #simd[8]f32 {…}
     

    Adds packed single-precision (32-bit) floating-point elements in a and b.

    _mm256_addsub_pd ¶

    _mm256_addsub_pd :: proc "c" (a, b: #simd[4]f64) -> #simd[4]f64 {…}
     

    Alternatively adds and subtracts packed double-precision (64-bit) floating-point elements in a to/from packed elements in b.

    _mm256_addsub_ps ¶

    _mm256_addsub_ps :: proc "c" (a, b: #simd[8]f32) -> #simd[8]f32 {…}
     

    Alternatively adds and subtracts packed single-precision (32-bit) floating-point elements in a to/from packed elements in b.

    _mm256_and_pd ¶

    _mm256_and_pd :: proc "c" (a, b: #simd[4]f64) -> #simd[4]f64 {…}
     

    Computes the bitwise AND of a packed double-precision (64-bit) floating-point elements in a and b.

    _mm256_and_ps ¶

    _mm256_and_ps :: proc "c" (a, b: #simd[8]f32) -> #simd[8]f32 {…}
     

    Computes the bitwise AND of packed single-precision (32-bit) floating-point elements in a and b.

    _mm256_andnot_pd ¶

    _mm256_andnot_pd :: proc "c" (a, b: #simd[4]f64) -> #simd[4]f64 {…}
     

    Computes the bitwise NOT of packed double-precision (64-bit) floating-point elements in a, and then AND with b.

    _mm256_andnot_ps ¶

    _mm256_andnot_ps :: proc "c" (a, b: #simd[8]f32) -> #simd[8]f32 {…}
     

    Computes the bitwise NOT of packed single-precision (32-bit) floating-point elements in a and then AND with b.

    _mm256_blend_pd ¶

    _mm256_blend_pd :: proc "c" (a, b: #simd[4]f64, $IIM4: u32) -> #simd[4]f64 {…}
     

    Blends packed double-precision (64-bit) floating-point elements from a and b using control mask imm8.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_pd)

    _mm256_blend_ps ¶

    _mm256_blend_ps :: proc "c" (a, b: #simd[8]f32, $IMM8: u8) -> #simd[8]f32 {…}
     

    Blends packed single-precision (32-bit) floating-point elements from a and b using control mask imm8.

    _mm256_blendv_pd ¶

    _mm256_blendv_pd :: proc "c" (a, b: #simd[4]f64, c: #simd[4]f64) -> #simd[4]f64 {…}
     

    Blends packed double-precision (64-bit) floating-point elements from a and b using c as a mask.

    _mm256_blendv_ps ¶

    _mm256_blendv_ps :: proc "c" (a, b: #simd[8]f32, c: #simd[8]f32) -> #simd[8]f32 {…}
     

    Blends packed single-precision (32-bit) floating-point elements from a and b using c as a mask.

    _mm256_broadcast_pd ¶

    _mm256_broadcast_pd :: proc "c" (a: ^#simd[2]f64) -> #simd[4]f64 {…}
     

    Broadcasts 128 bits from memory (composed of 2 packed double-precision (64-bit) floating-point elements) to all elements of the returned vector.

    _mm256_broadcast_ps ¶

    _mm256_broadcast_ps :: proc "c" (a: ^#simd[4]f32) -> #simd[8]f32 {…}
     

    Broadcasts 128 bits from memory (composed of 4 packed single-precision (32-bit) floating-point elements) to all elements of the returned vector.

    _mm256_broadcast_sd ¶

    _mm256_broadcast_sd :: proc "c" (f: ^f64) -> #simd[4]f64 {…}
     

    Broadcasts a double-precision (64-bit) floating-point element from memory to all elements of the returned vector.

    _mm256_broadcast_ss ¶

    _mm256_broadcast_ss :: proc "c" (f: ^f32) -> #simd[8]f32 {…}
     

    Broadcasts a single-precision (32-bit) floating-point element from memory to all elements of the returned vector.

    _mm256_castpd128_pd256 ¶

    _mm256_castpd128_pd256 :: proc "c" (a: #simd[2]f64) -> #simd[4]f64 {…}
     

    Casts vector of type __m128d to type __m256d; the upper 128 bits of the result are indeterminate.

    In the Intel documentation, the upper bits are declared to be "undefined".

    _mm256_castpd256_pd128 ¶

    _mm256_castpd256_pd128 :: proc "c" (a: #simd[4]f64) -> #simd[2]f64 {…}
     

    Casts vector of type __m256d to type __m128d.

    _mm256_castpd_ps ¶

    _mm256_castpd_ps :: proc "c" (a: #simd[4]f64) -> #simd[8]f32 {…}
     

    Cast vector of type __m256d to type __m256.

    _mm256_castpd_si256 ¶

    _mm256_castpd_si256 :: proc "c" (a: #simd[4]f64) -> #simd[4]i64 {…}
     

    Casts vector of type __m256d to type __m256i.

    _mm256_castps128_ps256 ¶

    _mm256_castps128_ps256 :: proc "c" (a: #simd[4]f32) -> #simd[8]f32 {…}
     

    Casts vector of type __m128 to type __m256; the upper 128 bits of the result are indeterminate.

    In the Intel documentation, the upper bits are declared to be "undefined".

    _mm256_castps256_ps128 ¶

    _mm256_castps256_ps128 :: proc "c" (a: #simd[8]f32) -> #simd[4]f32 {…}
     

    Casts vector of type __m256 to type __m128.

    _mm256_castps_pd ¶

    _mm256_castps_pd :: proc "c" (a: #simd[8]f32) -> #simd[4]f64 {…}
     

    Cast vector of type __m256 to type __m256d.

    _mm256_castps_si256 ¶

    _mm256_castps_si256 :: proc "c" (a: #simd[8]f32) -> #simd[4]i64 {…}
     

    Casts vector of type __m256 to type __m256i.

    _mm256_castsi128_si256 ¶

    _mm256_castsi128_si256 :: proc "c" (a: #simd[2]i64) -> #simd[4]i64 {…}
     

    Casts vector of type __m128i to type __m256i; the upper 128 bits of the result are indeterminate.

    In the Intel documentation, the upper bits are declared to be "undefined".

    _mm256_castsi256_pd ¶

    _mm256_castsi256_pd :: proc "c" (a: #simd[4]i64) -> #simd[4]f64 {…}
     

    Casts vector of type __m256i to type __m256d.

    _mm256_castsi256_ps ¶

    _mm256_castsi256_ps :: proc "c" (a: #simd[4]i64) -> #simd[8]f32 {…}
     

    Casts vector of type __m256i to type __m256.

    _mm256_castsi256_si128 ¶

    _mm256_castsi256_si128 :: proc "c" (a: #simd[4]i64) -> #simd[2]i64 {…}
     

    Casts vector of type __m256i to type __m128i.

    _mm256_ceil_pd ¶

    _mm256_ceil_pd :: proc "c" (a: #simd[4]f64) -> #simd[4]f64 {…}
     

    Rounds packed double-precision (64-bit) floating point elements in a toward positive infinity.

    _mm256_ceil_ps ¶

    _mm256_ceil_ps :: proc "c" (a: #simd[8]f32) -> #simd[8]f32 {…}
     

    Rounds packed single-precision (32-bit) floating point elements in a toward positive infinity.

    _mm256_cmp_pd ¶

    _mm256_cmp_pd :: proc "c" (a, b: #simd[4]f64, $IMM5: u8) -> #simd[4]f64 {…}
     

    Compares packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by IMM5.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd)

    _mm256_cmp_ps ¶

    _mm256_cmp_ps :: proc "c" (a, b: #simd[8]f32, $IMM5: u8) -> #simd[8]f32 {…}
     

    Compares packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by IMM5.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps)

    _mm256_cvtepi32_pd ¶

    _mm256_cvtepi32_pd :: proc "c" (a: #simd[2]i64) -> #simd[4]f64 {…}
     

    Converts packed 32-bit integers in a to packed double-precision (64-bit) floating-point elements.

    _mm256_cvtepi32_ps ¶

    _mm256_cvtepi32_ps :: proc "c" (a: #simd[4]i64) -> #simd[8]f32 {…}
     

    Converts packed 32-bit integers in a to packed single-precision (32-bit) floating-point elements.

    _mm256_cvtpd_epi32 ¶

    _mm256_cvtpd_epi32 :: proc "c" (a: #simd[4]f64) -> #simd[2]i64 {…}
     

    Converts packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epi32)

    _mm256_cvtpd_ps ¶

    _mm256_cvtpd_ps :: proc "c" (a: #simd[4]f64) -> #simd[4]f32 {…}
     

    Converts packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements.

    _mm256_cvtps_epi32 ¶

    _mm256_cvtps_epi32 :: proc "c" (a: #simd[8]f32) -> #simd[4]i64 {…}
     

    Converts packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epi32)

    _mm256_cvtps_pd ¶

    _mm256_cvtps_pd :: proc "c" (a: #simd[4]f32) -> #simd[4]f64 {…}
     

    Converts packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements.

    _mm256_cvtsd_f64 ¶

    _mm256_cvtsd_f64 :: proc "c" (a: #simd[4]f64) -> f64 {…}
     

    Returns the first element of the input vector of [4 x double].

    _mm256_cvtsi256_si32 ¶

    _mm256_cvtsi256_si32 :: proc "c" (a: #simd[4]i64) -> i32 {…}

    _mm256_cvtss_f32 ¶

    _mm256_cvtss_f32 :: proc "c" (a: #simd[8]f32) -> f32 {…}
     

    Returns the first element of the input vector of [8 x float].

    _mm256_cvttpd_epi32 ¶

    _mm256_cvttpd_epi32 :: proc "c" (a: #simd[4]f64) -> #simd[2]i64 {…}
     

    Converts packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epi32)

    _mm256_cvttps_epi32 ¶

    _mm256_cvttps_epi32 :: proc "c" (a: #simd[8]f32) -> #simd[4]i64 {…}
     

    Converts packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epi32)

    _mm256_div_pd ¶

    _mm256_div_pd :: proc "c" (a, b: #simd[4]f64) -> #simd[4]f64 {…}
     

    Computes the division of each of the 4 packed 64-bit floating-point elements in a by the corresponding packed elements in b.

    _mm256_div_ps ¶

    _mm256_div_ps :: proc "c" (a, b: #simd[8]f32) -> #simd[8]f32 {…}
     

    Computes the division of each of the 8 packed 32-bit floating-point elements in a by the corresponding packed elements in b.

    _mm256_dp_ps ¶

    _mm256_dp_ps :: proc "c" (a, b: #simd[8]f32, $IMM8: u8) -> #simd[8]f32 {…}
     

    Conditionally multiplies the packed single-precision (32-bit) floating-point elements in a and b using the high 4 bits in imm8, sum the four products, and conditionally return the sum using the low 4 bits of imm8.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dp_ps)

    _mm256_extract_epi32 ¶

    _mm256_extract_epi32 :: proc "c" (a: #simd[4]i64, $INDEX: u8) -> i32 {…}
     

    Extracts a 32-bit integer from a, selected with INDEX.

    _mm256_extract_epi64 ¶

    _mm256_extract_epi64 :: proc "c" (a: #simd[4]i64, $idx: u32) -> i64 {…}

    _mm256_extractf128_pd ¶

    _mm256_extractf128_pd :: proc "c" (a: #simd[4]f64, $IMM1: u8) -> #simd[2]f64 {…}
     

    Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, selected with imm8.

    _mm256_extractf128_ps ¶

    _mm256_extractf128_ps :: proc "c" (a: #simd[8]f32, $IMM1: u8) -> #simd[4]f32 {…}
     

    Extracts 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8.

    _mm256_extractf128_si256 ¶

    _mm256_extractf128_si256 :: proc "c" (a: #simd[4]i64, $IMM1: u8) -> #simd[2]i64 {…}
     

    Extracts 128 bits (composed of integer data) from a, selected with imm8.

    _mm256_floor_pd ¶

    _mm256_floor_pd :: proc "c" (a: #simd[4]f64) -> #simd[4]f64 {…}
     

    Rounds packed double-precision (64-bit) floating point elements in a toward negative infinity.

    _mm256_floor_ps ¶

    _mm256_floor_ps :: proc "c" (a: #simd[8]f32) -> #simd[8]f32 {…}
     

    Rounds packed single-precision (32-bit) floating point elements in a toward negative infinity.

    _mm256_hadd_pd ¶

    _mm256_hadd_pd :: proc "c" (a, b: #simd[4]f64) -> #simd[4]f64 {…}
     

    Horizontal addition of adjacent pairs in the two packed vectors of 4 64-bit floating points a and b. In the result, sums of elements from a are returned in even locations, while sums of elements from b are returned in odd locations.

    _mm256_hadd_ps ¶

    _mm256_hadd_ps :: proc "c" (a, b: #simd[8]f32) -> #simd[8]f32 {…}
     

    Horizontal addition of adjacent pairs in the two packed vectors of 8 32-bit floating points a and b. In the result, sums of elements from a are returned in locations of indices 0, 1, 4, 5; while sums of elements from b are locations 2, 3, 6, 7.

    _mm256_hsub_pd ¶

    _mm256_hsub_pd :: proc "c" (a, b: #simd[4]f64) -> #simd[4]f64 {…}
     

    Horizontal subtraction of adjacent pairs in the two packed vectors of 4 64-bit floating points a and b. In the result, sums of elements from a are returned in even locations, while sums of elements from b are returned in odd locations.

    _mm256_hsub_ps ¶

    _mm256_hsub_ps :: proc "c" (a, b: #simd[8]f32) -> #simd[8]f32 {…}
     

    Horizontal subtraction of adjacent pairs in the two packed vectors of 8 32-bit floating points a and b. In the result, sums of elements from a are returned in locations of indices 0, 1, 4, 5; while sums of elements from b are locations 2, 3, 6, 7.

    _mm256_insert_epi16 ¶

    _mm256_insert_epi16 :: proc "c" (a: #simd[4]i64, i: i16, $INDEX: u8) -> #simd[4]i64 {…}
     

    Copies a to result, and inserts the 16-bit integer i into result at the location specified by index.

    _mm256_insert_epi32 ¶

    _mm256_insert_epi32 :: proc "c" (a: #simd[4]i64, i: i32, $INDEX: u8) -> #simd[4]i64 {…}
     

    Copies a to result, and inserts the 32-bit integer i into result at the location specified by index.

    _mm256_insert_epi64 ¶

    _mm256_insert_epi64 :: proc "c" (a: #simd[4]i64, i: i64, $idx: u32) -> #simd[4]i64 {…}

    _mm256_insert_epi8 ¶

    _mm256_insert_epi8 :: proc "c" (a: #simd[4]i64, i: i8, $INDEX: u8) -> #simd[4]i64 {…}
     

    Copies a to result, and inserts the 8-bit integer i into result at the location specified by index.

    _mm256_insertf128_pd ¶

    _mm256_insertf128_pd :: proc "c" (a: #simd[4]f64, b: #simd[2]f64, $IMM1: u8) -> #simd[4]f64 {…}
     

    Copies a to result, then inserts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from b into result at the location specified by imm8.

    _mm256_insertf128_ps ¶

    _mm256_insertf128_ps :: proc "c" (a: #simd[8]f32, b: #simd[4]f32, $IMM1: u8) -> #simd[8]f32 {…}
     

    Copies a to result, then inserts 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into result at the location specified by imm8.

    _mm256_insertf128_si256 ¶

    _mm256_insertf128_si256 :: proc "c" (a: #simd[4]i64, b: #simd[2]i64, $IMM1: u8) -> #simd[4]i64 {…}
     

    Copies a to result, then inserts 128 bits from b into result at the location specified by imm8.

    _mm256_lddqu_si256 ¶

    _mm256_lddqu_si256 :: proc "c" (mem_addr: ^#simd[4]i64) -> #simd[4]i64 {…}
     

    Loads 256-bits of integer data from unaligned memory into result. This intrinsic may perform better than _mm256_loadu_si256 when the data crosses a cache line boundary.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_lddqu_si256)

    _mm256_load_pd ¶

    _mm256_load_pd :: proc "c" (mem_addr: ^f64) -> #simd[4]f64 {…}
     

    Loads 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from memory into result. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_pd)

    _mm256_load_ps ¶

    _mm256_load_ps :: proc "c" (mem_addr: ^f32) -> #simd[8]f32 {…}
     

    Loads 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from memory into result. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_ps)

    _mm256_load_si256 ¶

    _mm256_load_si256 :: proc "c" (mem_addr: ^#simd[4]i64) -> #simd[4]i64 {…}
     

    Loads 256-bits of integer data from memory into result. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_si256)

    _mm256_loadu2_m128 ¶

    _mm256_loadu2_m128 :: proc "c" (hiaddr, loaddr: ^f32) -> #simd[8]f32 {…}
     

    Loads two 128-bit values (composed of 4 packed single-precision (32-bit) floating-point elements) from memory, and combine them into a 256-bit value. hiaddr and loaddr do not need to be aligned on any particular boundary.

    _mm256_loadu2_m128d ¶

    _mm256_loadu2_m128d :: proc "c" (hiaddr, loaddr: ^f64) -> #simd[4]f64 {…}
     

    Loads two 128-bit values (composed of 2 packed double-precision (64-bit) floating-point elements) from memory, and combine them into a 256-bit value. hiaddr and loaddr do not need to be aligned on any particular boundary.

    _mm256_loadu2_m128i ¶

    _mm256_loadu2_m128i :: proc "c" (hiaddr, loaddr: ^#simd[2]i64) -> #simd[4]i64 {…}
     

    Loads two 128-bit values (composed of integer data) from memory, and combine them into a 256-bit value. hiaddr and loaddr do not need to be aligned on any particular boundary.

    _mm256_loadu_pd ¶

    _mm256_loadu_pd :: proc "c" (mem_addr: ^f64) -> #simd[4]f64 {…}
     

    Loads 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from memory into result. mem_addr does not need to be aligned on any particular boundary.

    _mm256_loadu_ps ¶

    _mm256_loadu_ps :: proc "c" (mem_addr: ^f32) -> #simd[8]f32 {…}
     

    Loads 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from memory into result. mem_addr does not need to be aligned on any particular boundary.

    _mm256_loadu_si256 ¶

    _mm256_loadu_si256 :: proc "c" (mem_addr: ^#simd[4]i64) -> #simd[4]i64 {…}
     

    Loads 256-bits of integer data from memory into result. mem_addr does not need to be aligned on any particular boundary.

    _mm256_maskload_pd ¶

    _mm256_maskload_pd :: proc "c" (mem_addr: ^f64, mask: #simd[4]i64) -> #simd[4]f64 {…}
     

    Loads packed double-precision (64-bit) floating-point elements from memory into result using mask (elements are zeroed out when the high bit of the corresponding element is not set).

    _mm256_maskload_ps ¶

    _mm256_maskload_ps :: proc "c" (mem_addr: ^f32, mask: #simd[4]i64) -> #simd[8]f32 {…}
     

    Loads packed single-precision (32-bit) floating-point elements from memory into result using mask (elements are zeroed out when the high bit of the corresponding element is not set).

    _mm256_maskstore_pd ¶

    _mm256_maskstore_pd :: proc "c" (mem_addr: ^f64, mask: #simd[4]i64, a: #simd[4]f64) {…}
     

    Stores packed double-precision (64-bit) floating-point elements from a into memory using mask.

    _mm256_maskstore_ps ¶

    _mm256_maskstore_ps :: proc "c" (mem_addr: ^f32, mask: #simd[4]i64, a: #simd[8]f32) {…}
     

    Stores packed single-precision (32-bit) floating-point elements from a into memory using mask.

    _mm256_max_pd ¶

    _mm256_max_pd :: proc "c" (a, b: #simd[4]f64) -> #simd[4]f64 {…}
     

    Compares packed double-precision (64-bit) floating-point elements in a and b, and returns packed maximum values

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_pd)

    _mm256_max_ps ¶

    _mm256_max_ps :: proc "c" (a, b: #simd[8]f32) -> #simd[8]f32 {…}
     

    Compares packed single-precision (32-bit) floating-point elements in a and b, and returns packed maximum values

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_ps)

    _mm256_min_pd ¶

    _mm256_min_pd :: proc "c" (a, b: #simd[4]f64) -> #simd[4]f64 {…}
     

    Compares packed double-precision (64-bit) floating-point elements in a and b, and returns packed minimum values

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_pd)

    _mm256_min_ps ¶

    _mm256_min_ps :: proc "c" (a, b: #simd[8]f32) -> #simd[8]f32 {…}
     

    Compares packed single-precision (32-bit) floating-point elements in a and b, and returns packed minimum values

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_ps)

    _mm256_movedup_pd ¶

    _mm256_movedup_pd :: proc "c" (a: #simd[4]f64) -> #simd[4]f64 {…}
     

    Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and returns the results.

    _mm256_movehdup_ps ¶

    _mm256_movehdup_ps :: proc "c" (a: #simd[8]f32) -> #simd[8]f32 {…}
     

    Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and returns the results.

    _mm256_moveldup_ps ¶

    _mm256_moveldup_ps :: proc "c" (a: #simd[8]f32) -> #simd[8]f32 {…}
     

    Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and returns the results.

    _mm256_movemask_pd ¶

    _mm256_movemask_pd :: proc "c" (a: #simd[4]f64) -> i32 {…}
     

    Sets each bit of the returned mask based on the most significant bit of the corresponding packed double-precision (64-bit) floating-point element in a.

    _mm256_movemask_ps ¶

    _mm256_movemask_ps :: proc "c" (a: #simd[8]f32) -> i32 {…}
     

    Sets each bit of the returned mask based on the most significant bit of the corresponding packed single-precision (32-bit) floating-point element in a.

    _mm256_mul_pd ¶

    _mm256_mul_pd :: proc "c" (a, b: #simd[4]f64) -> #simd[4]f64 {…}
     

    Multiplies packed double-precision (64-bit) floating-point elements in a and b.

    _mm256_mul_ps ¶

    _mm256_mul_ps :: proc "c" (a, b: #simd[8]f32) -> #simd[8]f32 {…}
     

    Multiplies packed single-precision (32-bit) floating-point elements in a and b.

    _mm256_or_pd ¶

    _mm256_or_pd :: proc "c" (a, b: #simd[4]f64) -> #simd[4]f64 {…}
     

    Computes the bitwise OR packed double-precision (64-bit) floating-point elements in a and b.

    _mm256_or_ps ¶

    _mm256_or_ps :: proc "c" (a, b: #simd[8]f32) -> #simd[8]f32 {…}
     

    Computes the bitwise OR packed single-precision (32-bit) floating-point elements in a and b.

    _mm256_permute2f128_pd ¶

    _mm256_permute2f128_pd :: proc "c" (a, b: #simd[4]f64, $IMM8: u8) -> #simd[4]f64 {…}
     

    Shuffles 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) selected by imm8 from a and b.

    _mm256_permute2f128_ps ¶

    _mm256_permute2f128_ps :: proc "c" (a, b: #simd[8]f32, $IMM8: u8) -> #simd[8]f32 {…}
     

    Shuffles 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) selected by imm8 from a and b.

    _mm256_permute2f128_si256 ¶

    _mm256_permute2f128_si256 :: proc "c" (a, b: #simd[4]i64, $IMM8: u8) -> #simd[4]i64 {…}
     

    Shuffles 128-bits (composed of integer data) selected by imm8 from a and b.

    _mm256_permute_pd ¶

    _mm256_permute_pd :: proc "c" (a: #simd[4]f64, $IMM4: u8) -> #simd[4]f64 {…}
     

    Shuffles double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8.

    _mm256_permute_ps ¶

    _mm256_permute_ps :: proc "c" (a: #simd[8]f32, $IMM8: u8) -> #simd[8]f32 {…}
     

    Shuffles single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8.

    _mm256_permutevar_pd ¶

    _mm256_permutevar_pd :: proc "c" (a: #simd[4]f64, b: #simd[4]i64) -> #simd[4]f64 {…}
     

    Shuffles double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in b.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar_pd)

    _mm256_permutevar_ps ¶

    _mm256_permutevar_ps :: proc "c" (a: #simd[8]f32, b: #simd[4]i64) -> #simd[8]f32 {…}
     

    Shuffles single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar_ps)

    _mm256_rcp_ps ¶

    _mm256_rcp_ps :: proc "c" (a: #simd[8]f32) -> #simd[8]f32 {…}
     

    Computes the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and returns the results. The maximum relative error for this approximation is less than 1.5*2^-12.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp_ps)

    _mm256_round_pd ¶

    _mm256_round_pd :: proc "c" (a: #simd[4]f64, $ROUNDING: u8) -> #simd[4]f64 {…}
     

    Rounds packed double-precision (64-bit) floating point elements in a according to the flag ROUNDING. The value of ROUNDING may be as follows:

    0x00: Round to the nearest whole number. 0x01: Round down, toward negative infinity. 0x02: Round up, toward positive infinity. 0x03: Truncate the values.

    For a complete list of options, check [the LLVM docs][llvm_docs].

    [llvm_docs]: https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_round_pd)

    _mm256_round_ps ¶

    _mm256_round_ps :: proc(a: #simd[8]f32, $ROUNDING: u8) -> #simd[8]f32 {…}
     

    Rounds packed single-precision (32-bit) floating point elements in a according to the flag ROUNDING. The value of ROUNDING may be as follows:

    0x00: Round to the nearest whole number. 0x01: Round down, toward negative infinity. 0x02: Round up, toward positive infinity. 0x03: Truncate the values.

    For a complete list of options, check [the LLVM docs][llvm_docs].

    [llvm_docs]: https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_round_ps)

    _mm256_rsqrt_ps ¶

    _mm256_rsqrt_ps :: proc "c" (a: #simd[8]f32) -> #simd[8]f32 {…}
     

    Computes the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and returns the results. The maximum relative error for this approximation is less than 1.5*2^-12.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt_ps)

    _mm256_set1_epi16 ¶

    _mm256_set1_epi16 :: proc "c" (a: i16) -> #simd[4]i64 {…}
     

    Broadcasts 16-bit integer a to all elements of returned vector. This intrinsic may generate the vpbroadcastw.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi16)

    _mm256_set1_epi32 ¶

    _mm256_set1_epi32 :: proc "c" (a: i32) -> #simd[4]i64 {…}
     

    Broadcasts 32-bit integer a to all elements of returned vector. This intrinsic may generate the vpbroadcastd.

    _mm256_set1_epi64x ¶

    _mm256_set1_epi64x :: proc "c" (a: i64) -> #simd[4]i64 {…}
     

    Broadcasts 64-bit integer a to all elements of returned vector. This intrinsic may generate the vpbroadcastq.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi64x)

    _mm256_set1_epi8 ¶

    _mm256_set1_epi8 :: proc "c" (a: i8) -> #simd[4]i64 {…}
     

    Broadcasts 8-bit integer a to all elements of returned vector. This intrinsic may generate the vpbroadcastb.

    _mm256_set1_pd ¶

    _mm256_set1_pd :: proc "c" (a: f64) -> #simd[4]f64 {…}
     

    Broadcasts double-precision (64-bit) floating-point value a to all elements of returned vector.

    _mm256_set1_ps ¶

    _mm256_set1_ps :: proc "c" (a: f32) -> #simd[8]f32 {…}
     

    Broadcasts single-precision (32-bit) floating-point value a to all elements of returned vector.

    _mm256_set_epi16 ¶

    _mm256_set_epi16 :: proc "c" (
    	e00, e01, e02, e03, e04, e05, e06, e07: i16, 
    	e08, e09, e10, e11, e12, e13, e14, 
    	e15:                                    i16, 
    ) -> #simd[4]i64 {…}
     

    Sets packed 16-bit integers in returned vector with the supplied values.

    _mm256_set_epi32 ¶

    _mm256_set_epi32 :: proc "c" (
    	e0, e1, e2, e3, e4, e5, e6, 
    	e7:                         i32, 
    ) -> #simd[4]i64 {…}
     

    Sets packed 32-bit integers in returned vector with the supplied values.

    _mm256_set_epi64x ¶

    _mm256_set_epi64x :: proc "c" (a: i64, b: i64, c: i64, d: i64) -> #simd[4]i64 {…}
     

    Sets packed 64-bit integers in returned vector with the supplied values.

    _mm256_set_epi8 ¶

    _mm256_set_epi8 :: proc "c" (
    	e00, e01, e02, e03, e04, e05, e06, e07: i8, 
    	e08, e09, e10, e11, e12, e13, e14, e15: i8, 
    	e16, e17, e18, e19, e20, e21, e22, e23: i8, 
    	e24, e25, e26, e27, e28, e29, e30, 
    	e31:                                    i8, 
    ) -> #simd[4]i64 {…}
     

    Sets packed 8-bit integers in returned vector with the supplied values.

    _mm256_set_m128 ¶

    _mm256_set_m128 :: proc "c" (hi: #simd[4]f32, lo: #simd[4]f32) -> #simd[8]f32 {…}
     

    Sets packed __m256 returned vector with the supplied values.

    _mm256_set_m128d ¶

    _mm256_set_m128d :: proc "c" (hi: #simd[2]f64, lo: #simd[2]f64) -> #simd[4]f64 {…}
     

    Sets packed __m256d returned vector with the supplied values.

    _mm256_set_m128i ¶

    _mm256_set_m128i :: proc "c" (hi: #simd[2]i64, lo: #simd[2]i64) -> #simd[4]i64 {…}
     

    Sets packed __m256i returned vector with the supplied values.

    _mm256_set_pd ¶

    _mm256_set_pd :: proc "c" (a: f64, b: f64, c: f64, d: f64) -> #simd[4]f64 {…}
     

    Sets packed double-precision (64-bit) floating-point elements in returned vector with the supplied values.

    _mm256_set_ps ¶

    _mm256_set_ps :: proc "c" (
    	a, b, c, d, e, f, g, 
    	h:                   f32, 
    ) -> #simd[8]f32 {…}
     

    Sets packed single-precision (32-bit) floating-point elements in returned vector with the supplied values.

    _mm256_setr_epi16 ¶

    _mm256_setr_epi16 :: proc "c" (
    	e00, e01, e02, e03, e04, e05, e06, e07: i16, 
    	e08, e09, e10, e11, e12, e13, e14, 
    	e15:                                    i16, 
    ) -> #simd[4]i64 {…}
     

    Sets packed 16-bit integers in returned vector with the supplied values in reverse order.

    _mm256_setr_epi32 ¶

    _mm256_setr_epi32 :: proc "c" (
    	e0, e1, e2, e3, e4, e5, e6, 
    	e7:                         i32, 
    ) -> #simd[4]i64 {…}
     

    Sets packed 32-bit integers in returned vector with the supplied values in reverse order.

    _mm256_setr_epi64x ¶

    _mm256_setr_epi64x :: proc "c" (a: i64, b: i64, c: i64, d: i64) -> #simd[4]i64 {…}
     

    Sets packed 64-bit integers in returned vector with the supplied values in reverse order.

    _mm256_setr_epi8 ¶

    _mm256_setr_epi8 :: proc "c" (
    	e00, e01, e02, e03, e04, e05, e06, e07: i8, 
    	e08, e09, e10, e11, e12, e13, e14, e15: i8, 
    	e16, e17, e18, e19, e20, e21, e22, e23: i8, 
    	e24, e25, e26, e27, e28, e29, e30, 
    	e31:                                    i8, 
    ) -> #simd[4]i64 {…}
     

    Sets packed 8-bit integers in returned vector with the supplied values in reverse order.

    _mm256_setr_m128 ¶

    _mm256_setr_m128 :: proc "c" (lo: #simd[4]f32, hi: #simd[4]f32) -> #simd[8]f32 {…}
     

    Sets packed __m256 returned vector with the supplied values.

    _mm256_setr_m128d ¶

    _mm256_setr_m128d :: proc "c" (lo: #simd[2]f64, hi: #simd[2]f64) -> #simd[4]f64 {…}
     

    Sets packed __m256d returned vector with the supplied values.

    _mm256_setr_m128i ¶

    _mm256_setr_m128i :: proc "c" (lo: #simd[2]i64, hi: #simd[2]i64) -> #simd[4]i64 {…}
     

    Sets packed __m256i returned vector with the supplied values.

    _mm256_setr_pd ¶

    _mm256_setr_pd :: proc "c" (a: f64, b: f64, c: f64, d: f64) -> #simd[4]f64 {…}
     

    Sets packed double-precision (64-bit) floating-point elements in returned vector with the supplied values in reverse order.

    _mm256_setr_ps ¶

    _mm256_setr_ps :: proc "c" (
    	a, b, c, d, e, f, g, 
    	h:                   f32, 
    ) -> #simd[8]f32 {…}
     

    Sets packed single-precision (32-bit) floating-point elements in returned vector with the supplied values in reverse order.

    _mm256_setzero_pd ¶

    _mm256_setzero_pd :: proc "c" () -> #simd[4]f64 {…}
     

    Returns vector of type __m256d with all elements set to zero.

    _mm256_setzero_ps ¶

    _mm256_setzero_ps :: proc "c" () -> #simd[8]f32 {…}
     

    Returns vector of type __m256 with all elements set to zero.

    _mm256_setzero_si256 ¶

    _mm256_setzero_si256 :: proc "c" () -> #simd[4]i64 {…}
     

    Returns vector of type __m256i with all elements set to zero.

    _mm256_shuffle_pd ¶

    _mm256_shuffle_pd :: proc "c" (a, b: #simd[4]f64, $MASK: u8) -> #simd[4]f64 {…}
     

    Shuffles double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8.

    _mm256_shuffle_ps ¶

    _mm256_shuffle_ps :: proc "c" (a, b: #simd[8]f32, $MASK: u8) -> #simd[8]f32 {…}
     

    Shuffles single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8.

    _mm256_sqrt_pd ¶

    _mm256_sqrt_pd :: proc "c" (a: #simd[4]f64) -> #simd[4]f64 {…}
     

    Returns the square root of packed double-precision (64-bit) floating point elements in a.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sqrt_pd)

    _mm256_sqrt_ps ¶

    _mm256_sqrt_ps :: proc "c" (a: #simd[8]f32) -> #simd[8]f32 {…}
     

    Returns the square root of packed single-precision (32-bit) floating point elements in a.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sqrt_ps)

    _mm256_store_pd ¶

    _mm256_store_pd :: proc "c" (mem_addr: ^f64, a: #simd[4]f64) {…}
     

    Stores 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_pd)

    _mm256_store_ps ¶

    _mm256_store_ps :: proc "c" (mem_addr: ^f32, a: #simd[8]f32) {…}
     

    Stores 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_ps)

    _mm256_store_si256 ¶

    _mm256_store_si256 :: proc "c" (mem_addr: ^#simd[4]i64, a: #simd[4]i64) {…}
     

    Stores 256-bits of integer data from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_si256)

    _mm256_storeu2_m128 ¶

    _mm256_storeu2_m128 :: proc "c" (hiaddr, loaddr: ^f32, a: #simd[8]f32) {…}
     

    Stores the high and low 128-bit halves (each composed of 4 packed single-precision (32-bit) floating-point elements) from a into memory two different 128-bit locations. hiaddr and loaddr do not need to be aligned on any particular boundary.

    _mm256_storeu2_m128d ¶

    _mm256_storeu2_m128d :: proc "c" (hiaddr, loaddr: ^f64, a: #simd[4]f64) {…}
     

    Stores the high and low 128-bit halves (each composed of 2 packed double-precision (64-bit) floating-point elements) from a into memory two different 128-bit locations. hiaddr and loaddr do not need to be aligned on any particular boundary.

    _mm256_storeu2_m128i ¶

    _mm256_storeu2_m128i :: proc "c" (hiaddr, loaddr: ^#simd[2]i64, a: #simd[4]i64) {…}
     

    Stores the high and low 128-bit halves (each composed of integer data) from a into memory two different 128-bit locations. hiaddr and loaddr do not need to be aligned on any particular boundary.

    _mm256_storeu_pd ¶

    _mm256_storeu_pd :: proc "c" (mem_addr: ^f64, a: #simd[4]f64) {…}
     

    Stores 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a into memory. mem_addr does not need to be aligned on any particular boundary.

    _mm256_storeu_ps ¶

    _mm256_storeu_ps :: proc "c" (mem_addr: ^f32, a: #simd[8]f32) {…}
     

    Stores 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a into memory. mem_addr does not need to be aligned on any particular boundary.

    _mm256_storeu_si256 ¶

    _mm256_storeu_si256 :: proc "c" (mem_addr: ^#simd[4]i64, a: #simd[4]i64) {…}
     

    Stores 256-bits of integer data from a into memory. mem_addr does not need to be aligned on any particular boundary.

    _mm256_sub_pd ¶

    _mm256_sub_pd :: proc "c" (a, b: #simd[4]f64) -> #simd[4]f64 {…}
     

    Subtracts packed double-precision (64-bit) floating-point elements in b from packed elements in a.

    _mm256_sub_ps ¶

    _mm256_sub_ps :: proc "c" (a, b: #simd[8]f32) -> #simd[8]f32 {…}
     

    Subtracts packed single-precision (32-bit) floating-point elements in b from packed elements in a.

    _mm256_testc_pd ¶

    _mm256_testc_pd :: proc "c" (a, b: #simd[4]f64) -> i32 {…}
     

    Computes the bitwise AND of 256 bits (representing double-precision (64-bit) floating-point elements) in a and b, producing an intermediate 256-bit value, and set ZF to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set ZF to 0. Compute the bitwise NOT of a and then AND with b, producing an intermediate value, and set CF to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set CF to 0. Return the CF value.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testc_pd)

    _mm256_testc_ps ¶

    _mm256_testc_ps :: proc "c" (a, b: #simd[8]f32) -> i32 {…}
     

    Computes the bitwise AND of 256 bits (representing single-precision (32-bit) floating-point elements) in a and b, producing an intermediate 256-bit value, and set ZF to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set ZF to 0. Compute the bitwise NOT of a and then AND with b, producing an intermediate value, and set CF to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set CF to 0. Return the CF value.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testc_ps)

    _mm256_testc_si256 ¶

    _mm256_testc_si256 :: proc "c" (a, b: #simd[4]i64) -> i32 {…}
     

    Computes the bitwise AND of 256 bits (representing integer data) in a and b, and set ZF to 1 if the result is zero, otherwise set ZF to 0. Computes the bitwise NOT of a and then AND with b, and set CF to 1 if the result is zero, otherwise set CF to 0. Return the CF value.

    _mm256_testnzc_pd ¶

    _mm256_testnzc_pd :: proc "c" (a, b: #simd[4]f64) -> i32 {…}
     

    Computes the bitwise AND of 256 bits (representing double-precision (64-bit) floating-point elements) in a and b, producing an intermediate 256-bit value, and set ZF to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set ZF to 0. Compute the bitwise NOT of a and then AND with b, producing an intermediate value, and set CF to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set CF to 0. Return 1 if both the ZF and CF values are zero, otherwise return 0.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testnzc_pd)

    _mm256_testnzc_ps ¶

    _mm256_testnzc_ps :: proc "c" (a, b: #simd[8]f32) -> i32 {…}
     

    Computes the bitwise AND of 256 bits (representing single-precision (32-bit) floating-point elements) in a and b, producing an intermediate 256-bit value, and set ZF to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set ZF to 0. Compute the bitwise NOT of a and then AND with b, producing an intermediate value, and set CF to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set CF to 0. Return 1 if both the ZF and CF values are zero, otherwise return 0.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testnzc_ps)

    _mm256_testnzc_si256 ¶

    _mm256_testnzc_si256 :: proc "c" (a, b: #simd[4]i64) -> i32 {…}
     

    Computes the bitwise AND of 256 bits (representing integer data) in a and b, and set ZF to 1 if the result is zero, otherwise set ZF to 0. Computes the bitwise NOT of a and then AND with b, and set CF to 1 if the result is zero, otherwise set CF to 0. Return 1 if both the ZF and CF values are zero, otherwise return 0.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testnzc_si256)

    _mm256_testz_pd ¶

    _mm256_testz_pd :: proc "c" (a, b: #simd[4]f64) -> i32 {…}
     

    Computes the bitwise AND of 256 bits (representing double-precision (64-bit) floating-point elements) in a and b, producing an intermediate 256-bit value, and set ZF to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set ZF to 0. Compute the bitwise NOT of a and then AND with b, producing an intermediate value, and set CF to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set CF to 0. Return the ZF value.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_pd)

    _mm256_testz_ps ¶

    _mm256_testz_ps :: proc "c" (a, b: #simd[8]f32) -> i32 {…}
     

    Computes the bitwise AND of 256 bits (representing single-precision (32-bit) floating-point elements) in a and b, producing an intermediate 256-bit value, and set ZF to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set ZF to 0. Compute the bitwise NOT of a and then AND with b, producing an intermediate value, and set CF to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set CF to 0. Return the ZF value.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_ps)

    _mm256_testz_si256 ¶

    _mm256_testz_si256 :: proc "c" (a, b: #simd[4]i64) -> i32 {…}
     

    Computes the bitwise AND of 256 bits (representing integer data) in a and b, and set ZF to 1 if the result is zero, otherwise set ZF to 0. Computes the bitwise NOT of a and then AND with b, and set CF to 1 if the result is zero, otherwise set CF to 0. Return the ZF value.

    _mm256_undefined_pd ¶

    _mm256_undefined_pd :: proc "c" () -> #simd[4]f64 {…}
     

    Returns vector of type __m256d with indeterminate elements. Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically

    _mm256_undefined_ps ¶

    _mm256_undefined_ps :: proc "c" () -> #simd[8]f32 {…}
     

    Returns vector of type __m256 with indeterminate elements. Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically

    _mm256_undefined_si256 ¶

    _mm256_undefined_si256 :: proc "c" () -> #simd[4]i64 {…}
     

    Returns vector of type __m256i with with indeterminate elements. Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically

    _mm256_unpackhi_pd ¶

    _mm256_unpackhi_pd :: proc "c" (a, b: #simd[4]f64) -> #simd[4]f64 {…}
     

    Unpacks and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b.

    _mm256_unpackhi_ps ¶

    _mm256_unpackhi_ps :: proc "c" (a, b: #simd[8]f32) -> #simd[8]f32 {…}
     

    Unpacks and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b.

    _mm256_unpacklo_pd ¶

    _mm256_unpacklo_pd :: proc "c" (a, b: #simd[4]f64) -> #simd[4]f64 {…}
     

    Unpacks and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b.

    _mm256_unpacklo_ps ¶

    _mm256_unpacklo_ps :: proc "c" (a, b: #simd[8]f32) -> #simd[8]f32 {…}
     

    Unpacks and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b.

    _mm256_xor_pd ¶

    _mm256_xor_pd :: proc "c" (a, b: #simd[4]f64) -> #simd[4]f64 {…}
     

    Computes the bitwise XOR of packed double-precision (64-bit) floating-point elements in a and b.

    _mm256_xor_ps ¶

    _mm256_xor_ps :: proc "c" (a, b: #simd[8]f32) -> #simd[8]f32 {…}
     

    Computes the bitwise XOR of packed single-precision (32-bit) floating-point elements in a and b.

    _mm256_zeroall ¶

    _mm256_zeroall :: proc "c" () {…}
     

    Zeroes the contents of all XMM or YMM registers.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroall)

    _mm256_zeroupper ¶

    _mm256_zeroupper :: proc "c" () {…}
     

    Zeroes the upper 128 bits of all YMM registers; the lower 128-bits of the registers are unmodified.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroupper)

    _mm256_zextpd128_pd256 ¶

    _mm256_zextpd128_pd256 :: proc "c" (a: #simd[2]f64) -> #simd[4]f64 {…}
     

    Constructs a 256-bit floating-point vector of [4 x double] from a 128-bit floating-point vector of [2 x double]. The lower 128 bits contain the value of the source vector. The upper 128 bits are set to zero.

    _mm256_zextps128_ps256 ¶

    _mm256_zextps128_ps256 :: proc "c" (a: #simd[4]f32) -> #simd[8]f32 {…}
     

    Constructs a 256-bit floating-point vector of [8 x float] from a 128-bit floating-point vector of [4 x float]. The lower 128 bits contain the value of the source vector. The upper 128 bits are set to zero.

    _mm256_zextsi128_si256 ¶

    _mm256_zextsi128_si256 :: proc "c" (a: #simd[2]i64) -> #simd[4]i64 {…}
     

    Constructs a 256-bit integer vector from a 128-bit integer vector. The lower 128 bits contain the value of the source vector. The upper 128 bits are set to zero.

    _mm_abs_epi16 ¶

    _mm_abs_epi16 :: proc "c" (a: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_abs_epi32 ¶

    _mm_abs_epi32 :: proc "c" (a: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_abs_epi8 ¶

    _mm_abs_epi8 :: proc "c" (a: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_add_epi16 ¶

    _mm_add_epi16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_add_epi32 ¶

    _mm_add_epi32 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_add_epi64 ¶

    _mm_add_epi64 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_add_epi8 ¶

    _mm_add_epi8 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_add_pd ¶

    _mm_add_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_add_ps ¶

    _mm_add_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_add_sd ¶

    _mm_add_sd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_add_ss ¶

    _mm_add_ss :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_adds_epi16 ¶

    _mm_adds_epi16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_adds_epi8 ¶

    _mm_adds_epi8 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_adds_epu16 ¶

    _mm_adds_epu16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_adds_epu8 ¶

    _mm_adds_epu8 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_addsub_pd ¶

    _mm_addsub_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_addsub_ps ¶

    _mm_addsub_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_aesdec_si128 ¶

    _mm_aesdec_si128 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_aesdeclast_si128 ¶

    _mm_aesdeclast_si128 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_aesenc_si128 ¶

    _mm_aesenc_si128 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_aesenclast_si128 ¶

    _mm_aesenclast_si128 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_aesimc_si128 ¶

    _mm_aesimc_si128 :: proc "c" (a: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_aeskeygenassist_si128 ¶

    _mm_aeskeygenassist_si128 :: proc "c" (a: #simd[2]i64, $IMM8: u32) -> #simd[2]i64 {…}

    _mm_alignr_epi8 ¶

    _mm_alignr_epi8 :: proc "c" (a: #simd[2]i64, b: #simd[2]i64, $IMM8: i8) -> #simd[2]i64 {…}

    _mm_and_pd ¶

    _mm_and_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_and_ps ¶

    _mm_and_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_and_si128 ¶

    _mm_and_si128 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_andnot_pd ¶

    _mm_andnot_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_andnot_ps ¶

    _mm_andnot_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_andnot_si128 ¶

    _mm_andnot_si128 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_avg_epu16 ¶

    _mm_avg_epu16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_avg_epu8 ¶

    _mm_avg_epu8 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_blend_epi16 ¶

    _mm_blend_epi16 :: proc "c" (a: #simd[2]i64, b: #simd[2]i64, $IMM8: i8) -> #simd[2]i64 {…}

    _mm_blend_pd ¶

    _mm_blend_pd :: proc "c" (a, b: #simd[2]f64, $IMM2: u8) -> #simd[2]f64 {…}

    _mm_blend_ps ¶

    _mm_blend_ps :: proc "c" (a, b: #simd[4]f32, $IMM4: u8) -> #simd[4]f32 {…}

    _mm_blendv_epi8 ¶

    _mm_blendv_epi8 :: proc "c" (a, b, mask: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_blendv_pd ¶

    _mm_blendv_pd :: proc "c" (a, b, mask: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_blendv_ps ¶

    _mm_blendv_ps :: proc "c" (a, b, mask: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_broadcast_ss ¶

    _mm_broadcast_ss :: proc "c" (f: ^f32) -> #simd[4]f32 {…}
     

    Broadcasts a single-precision (32-bit) floating-point element from memory to all elements of the returned vector.

    _mm_bslli_si128 ¶

    _mm_bslli_si128 :: proc "c" (a: #simd[2]i64, $IMM8: u32) -> #simd[2]i64 {…}

    _mm_bsrli_si128 ¶

    _mm_bsrli_si128 :: proc "c" (a: #simd[2]i64, $IMM8: u32) -> #simd[2]i64 {…}

    _mm_castpd_ps ¶

    _mm_castpd_ps :: proc "c" (a: #simd[2]f64) -> #simd[4]f32 {…}

    _mm_castpd_si128 ¶

    _mm_castpd_si128 :: proc "c" (a: #simd[2]f64) -> #simd[2]i64 {…}

    _mm_castps_pd ¶

    _mm_castps_pd :: proc "c" (a: #simd[4]f32) -> #simd[2]f64 {…}

    _mm_castps_si128 ¶

    _mm_castps_si128 :: proc "c" (a: #simd[4]f32) -> #simd[2]i64 {…}

    _mm_castsi128_pd ¶

    _mm_castsi128_pd :: proc "c" (a: #simd[2]i64) -> #simd[2]f64 {…}

    _mm_castsi128_ps ¶

    _mm_castsi128_ps :: proc "c" (a: #simd[2]i64) -> #simd[4]f32 {…}

    _mm_ceil_pd ¶

    _mm_ceil_pd :: proc "c" (a: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_ceil_ps ¶

    _mm_ceil_ps :: proc "c" (a: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_ceil_sd ¶

    _mm_ceil_sd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_ceil_ss ¶

    _mm_ceil_ss :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_clflush ¶

    _mm_clflush :: proc "c" (p: rawptr) {…}

    _mm_clmulepi64_si128 ¶

    _mm_clmulepi64_si128 :: proc "c" (a: #simd[2]i64, b: #simd[2]i64, $IMM8: i8) -> #simd[2]i64 {…}

    _mm_cmp_pd ¶

    _mm_cmp_pd :: proc "c" (a, b: #simd[2]f64, $IMM5: u8) -> #simd[2]f64 {…}
     

    Compares packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by IMM5.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd)

    _mm_cmp_ps ¶

    _mm_cmp_ps :: proc "c" (a: #simd[4]f32, b: #simd[4]f32, $IMM5: u8) -> #simd[4]f32 {…}
     

    Compares packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by IMM5.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps)

    _mm_cmp_sd ¶

    _mm_cmp_sd :: proc "c" (a, b: #simd[2]f64, $IMM5: u8) -> #simd[2]f64 {…}
     

    Compares the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by IMM5, store the result in the lower element of returned vector, and copies the upper element from a to the upper element of returned vector.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd)

    _mm_cmp_ss ¶

    _mm_cmp_ss :: proc "c" (a: #simd[4]f32, b: #simd[4]f32, $IMM5: u8) -> #simd[4]f32 {…}
     

    Compares the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by IMM5, store the result in the lower element of returned vector, and copies the upper 3 packed elements from a to the upper elements of returned vector.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss)

    _mm_cmpeq_epi16 ¶

    _mm_cmpeq_epi16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_cmpeq_epi32 ¶

    _mm_cmpeq_epi32 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_cmpeq_epi64 ¶

    _mm_cmpeq_epi64 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_cmpeq_epi8 ¶

    _mm_cmpeq_epi8 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_cmpeq_pd ¶

    _mm_cmpeq_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_cmpeq_ps ¶

    _mm_cmpeq_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_cmpeq_sd ¶

    _mm_cmpeq_sd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_cmpeq_ss ¶

    _mm_cmpeq_ss :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_cmpestra ¶

    _mm_cmpestra :: proc "c" (a: #simd[2]i64, la: i32, b: #simd[2]i64, lb: i32, $IMM8: i8) -> i32 {…}

    _mm_cmpestrc ¶

    _mm_cmpestrc :: proc "c" (a: #simd[2]i64, la: i32, b: #simd[2]i64, lb: i32, $IMM8: i8) -> i32 {…}

    _mm_cmpestri ¶

    _mm_cmpestri :: proc "c" (a: #simd[2]i64, la: i32, b: #simd[2]i64, lb: i32, $IMM8: i8) -> i32 {…}

    _mm_cmpestrm ¶

    _mm_cmpestrm :: proc "c" (a: #simd[2]i64, la: i32, b: #simd[2]i64, lb: i32, $IMM8: i8) -> #simd[2]i64 {…}

    _mm_cmpestro ¶

    _mm_cmpestro :: proc "c" (a: #simd[2]i64, la: i32, b: #simd[2]i64, lb: i32, $IMM8: i8) -> i32 {…}

    _mm_cmpestrs ¶

    _mm_cmpestrs :: proc "c" (a: #simd[2]i64, la: i32, b: #simd[2]i64, lb: i32, $IMM8: i8) -> i32 {…}

    _mm_cmpestrz ¶

    _mm_cmpestrz :: proc "c" (a: #simd[2]i64, la: i32, b: #simd[2]i64, lb: i32, $IMM8: i8) -> i32 {…}

    _mm_cmpge_pd ¶

    _mm_cmpge_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_cmpge_ps ¶

    _mm_cmpge_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_cmpge_sd ¶

    _mm_cmpge_sd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_cmpge_ss ¶

    _mm_cmpge_ss :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_cmpgt_epi16 ¶

    _mm_cmpgt_epi16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_cmpgt_epi32 ¶

    _mm_cmpgt_epi32 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_cmpgt_epi64 ¶

    _mm_cmpgt_epi64 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_cmpgt_epi8 ¶

    _mm_cmpgt_epi8 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_cmpgt_pd ¶

    _mm_cmpgt_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_cmpgt_ps ¶

    _mm_cmpgt_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_cmpgt_sd ¶

    _mm_cmpgt_sd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_cmpgt_ss ¶

    _mm_cmpgt_ss :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_cmpistra ¶

    _mm_cmpistra :: proc "c" (a: #simd[2]i64, b: #simd[2]i64, $IMM8: i8) -> i32 {…}

    _mm_cmpistrc ¶

    _mm_cmpistrc :: proc "c" (a: #simd[2]i64, b: #simd[2]i64, $IMM8: i8) -> i32 {…}

    _mm_cmpistri ¶

    _mm_cmpistri :: proc "c" (a: #simd[2]i64, b: #simd[2]i64, $IMM8: i8) -> i32 {…}

    _mm_cmpistrm ¶

    _mm_cmpistrm :: proc "c" (a: #simd[2]i64, b: #simd[2]i64, $IMM8: i8) -> #simd[2]i64 {…}

    _mm_cmpistro ¶

    _mm_cmpistro :: proc "c" (a: #simd[2]i64, b: #simd[2]i64, $IMM8: i8) -> i32 {…}

    _mm_cmpistrs ¶

    _mm_cmpistrs :: proc "c" (a: #simd[2]i64, b: #simd[2]i64, $IMM8: i8) -> i32 {…}

    _mm_cmpistrz ¶

    _mm_cmpistrz :: proc "c" (a: #simd[2]i64, b: #simd[2]i64, $IMM8: i8) -> i32 {…}

    _mm_cmple_pd ¶

    _mm_cmple_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_cmple_ps ¶

    _mm_cmple_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_cmple_sd ¶

    _mm_cmple_sd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_cmple_ss ¶

    _mm_cmple_ss :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_cmplt_epi16 ¶

    _mm_cmplt_epi16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_cmplt_epi32 ¶

    _mm_cmplt_epi32 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_cmplt_epi8 ¶

    _mm_cmplt_epi8 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_cmplt_pd ¶

    _mm_cmplt_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_cmplt_ps ¶

    _mm_cmplt_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_cmplt_sd ¶

    _mm_cmplt_sd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_cmplt_ss ¶

    _mm_cmplt_ss :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_cmpneq_pd ¶

    _mm_cmpneq_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_cmpneq_ps ¶

    _mm_cmpneq_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_cmpneq_sd ¶

    _mm_cmpneq_sd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_cmpneq_ss ¶

    _mm_cmpneq_ss :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_cmpnge_pd ¶

    _mm_cmpnge_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_cmpnge_ps ¶

    _mm_cmpnge_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_cmpnge_sd ¶

    _mm_cmpnge_sd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_cmpnge_ss ¶

    _mm_cmpnge_ss :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_cmpngt_pd ¶

    _mm_cmpngt_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_cmpngt_ps ¶

    _mm_cmpngt_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_cmpngt_sd ¶

    _mm_cmpngt_sd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_cmpngt_ss ¶

    _mm_cmpngt_ss :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_cmpnle_pd ¶

    _mm_cmpnle_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_cmpnle_ps ¶

    _mm_cmpnle_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_cmpnle_sd ¶

    _mm_cmpnle_sd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_cmpnle_ss ¶

    _mm_cmpnle_ss :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_cmpnlt_pd ¶

    _mm_cmpnlt_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_cmpnlt_ps ¶

    _mm_cmpnlt_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_cmpnlt_sd ¶

    _mm_cmpnlt_sd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_cmpnlt_ss ¶

    _mm_cmpnlt_ss :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_cmpord_pd ¶

    _mm_cmpord_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_cmpord_ps ¶

    _mm_cmpord_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_cmpord_sd ¶

    _mm_cmpord_sd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_cmpord_ss ¶

    _mm_cmpord_ss :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_cmpunord_pd ¶

    _mm_cmpunord_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_cmpunord_ps ¶

    _mm_cmpunord_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_cmpunord_sd ¶

    _mm_cmpunord_sd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_cmpunord_ss ¶

    _mm_cmpunord_ss :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_comieq_sd ¶

    _mm_comieq_sd :: proc "c" (a, b: #simd[2]f64) -> i32 {…}

    _mm_comieq_ss ¶

    _mm_comieq_ss :: proc "c" (a, b: #simd[4]f32) -> b32 {…}

    _mm_comige_sd ¶

    _mm_comige_sd :: proc "c" (a, b: #simd[2]f64) -> i32 {…}

    _mm_comige_ss ¶

    _mm_comige_ss :: proc "c" (a, b: #simd[4]f32) -> b32 {…}

    _mm_comigt_sd ¶

    _mm_comigt_sd :: proc "c" (a, b: #simd[2]f64) -> i32 {…}

    _mm_comigt_ss ¶

    _mm_comigt_ss :: proc "c" (a, b: #simd[4]f32) -> b32 {…}

    _mm_comile_sd ¶

    _mm_comile_sd :: proc "c" (a, b: #simd[2]f64) -> i32 {…}

    _mm_comile_ss ¶

    _mm_comile_ss :: proc "c" (a, b: #simd[4]f32) -> b32 {…}

    _mm_comilt_sd ¶

    _mm_comilt_sd :: proc "c" (a, b: #simd[2]f64) -> i32 {…}

    _mm_comilt_ss ¶

    _mm_comilt_ss :: proc "c" (a, b: #simd[4]f32) -> b32 {…}

    _mm_comineq_sd ¶

    _mm_comineq_sd :: proc "c" (a, b: #simd[2]f64) -> i32 {…}

    _mm_comineq_ss ¶

    _mm_comineq_ss :: proc "c" (a, b: #simd[4]f32) -> b32 {…}

    _mm_crc32_u16 ¶

    _mm_crc32_u16 :: proc "c" (crc: u32, v: u16) -> u32 {…}

    _mm_crc32_u32 ¶

    _mm_crc32_u32 :: proc "c" (crc: u32, v: u32) -> u32 {…}

    _mm_crc32_u64 ¶

    _mm_crc32_u64 :: proc "c" (crc: u64, v: u64) -> u64 {…}

    _mm_crc32_u8 ¶

    _mm_crc32_u8 :: proc "c" (crc: u32, v: u8) -> u32 {…}

    _mm_cvtepi16_epi32 ¶

    _mm_cvtepi16_epi32 :: proc "c" (a: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_cvtepi16_epi64 ¶

    _mm_cvtepi16_epi64 :: proc "c" (a: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_cvtepi32_epi64 ¶

    _mm_cvtepi32_epi64 :: proc "c" (a: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_cvtepi32_pd ¶

    _mm_cvtepi32_pd :: proc "c" (a: #simd[2]i64) -> #simd[2]f64 {…}

    _mm_cvtepi32_ps ¶

    _mm_cvtepi32_ps :: proc "c" (a: #simd[2]i64) -> #simd[4]f32 {…}

    _mm_cvtepi8_epi16 ¶

    _mm_cvtepi8_epi16 :: proc "c" (a: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_cvtepi8_epi32 ¶

    _mm_cvtepi8_epi32 :: proc "c" (a: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_cvtepi8_epi64 ¶

    _mm_cvtepi8_epi64 :: proc "c" (a: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_cvtepu16_epi32 ¶

    _mm_cvtepu16_epi32 :: proc "c" (a: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_cvtepu16_epi64 ¶

    _mm_cvtepu16_epi64 :: proc "c" (a: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_cvtepu32_epi64 ¶

    _mm_cvtepu32_epi64 :: proc "c" (a: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_cvtepu8_epi16 ¶

    _mm_cvtepu8_epi16 :: proc "c" (a: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_cvtepu8_epi32 ¶

    _mm_cvtepu8_epi32 :: proc "c" (a: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_cvtepu8_epi64 ¶

    _mm_cvtepu8_epi64 :: proc "c" (a: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_cvtpd_epi32 ¶

    _mm_cvtpd_epi32 :: proc "c" (a: #simd[2]f64) -> #simd[2]i64 {…}

    _mm_cvtpd_ps ¶

    _mm_cvtpd_ps :: proc "c" (a: #simd[2]f64) -> #simd[4]f32 {…}

    _mm_cvtps_epi32 ¶

    _mm_cvtps_epi32 :: proc "c" (a: #simd[4]f32) -> #simd[2]i64 {…}

    _mm_cvtps_pd ¶

    _mm_cvtps_pd :: proc "c" (a: #simd[4]f32) -> #simd[2]f64 {…}

    _mm_cvtsd_f64 ¶

    _mm_cvtsd_f64 :: proc "c" (a: #simd[2]f64) -> f64 {…}

    _mm_cvtsd_si32 ¶

    _mm_cvtsd_si32 :: proc "c" (a: #simd[2]f64) -> i32 {…}

    _mm_cvtsd_si64 ¶

    _mm_cvtsd_si64 :: proc "c" (a: #simd[2]f64) -> i64 {…}

    _mm_cvtsd_si64x ¶

    _mm_cvtsd_si64x :: proc "c" (a: #simd[2]f64) -> i64 {…}

    _mm_cvtsd_ss ¶

    _mm_cvtsd_ss :: proc "c" (a, b: #simd[2]f64) -> #simd[4]f32 {…}

    _mm_cvtsi128_si32 ¶

    _mm_cvtsi128_si32 :: proc "c" (a: #simd[2]i64) -> i32 {…}

    _mm_cvtsi128_si64 ¶

    _mm_cvtsi128_si64 :: proc "c" (a: #simd[2]i64) -> i64 {…}

    _mm_cvtsi128_si64x ¶

    _mm_cvtsi128_si64x :: proc "c" (a: #simd[2]i64) -> i64 {…}

    _mm_cvtsi32_sd ¶

    _mm_cvtsi32_sd :: proc "c" (a: #simd[2]f64, b: i32) -> #simd[2]f64 {…}

    _mm_cvtsi32_si128 ¶

    _mm_cvtsi32_si128 :: proc "c" (a: i32) -> #simd[2]i64 {…}

    _mm_cvtsi32_ss ¶

    _mm_cvtsi32_ss :: proc "c" (a: #simd[4]f32, b: i32) -> #simd[4]f32 {…}

    _mm_cvtsi64_sd ¶

    _mm_cvtsi64_sd :: proc "c" (a: #simd[2]f64, b: i64) -> #simd[2]f64 {…}

    _mm_cvtsi64_si128 ¶

    _mm_cvtsi64_si128 :: proc "c" (a: i64) -> #simd[2]i64 {…}

    _mm_cvtsi64_ss ¶

    _mm_cvtsi64_ss :: proc "c" (a: #simd[4]f32, b: i64) -> #simd[4]f32 {…}

    _mm_cvtsi64x_sd ¶

    _mm_cvtsi64x_sd :: proc "c" (a: #simd[2]f64, b: i64) -> #simd[2]f64 {…}

    _mm_cvtsi64x_si128 ¶

    _mm_cvtsi64x_si128 :: proc "c" (a: i64) -> #simd[2]i64 {…}

    _mm_cvtss_f32 ¶

    _mm_cvtss_f32 :: proc "c" (a: #simd[4]f32) -> f32 {…}

    _mm_cvtss_sd ¶

    _mm_cvtss_sd :: proc "c" (a, b: #simd[4]f32) -> #simd[2]f64 {…}

    _mm_cvtss_si32 ¶

    _mm_cvtss_si32 :: proc "c" (a: #simd[4]f32) -> i32 {…}

    _mm_cvtss_si64 ¶

    _mm_cvtss_si64 :: proc "c" (a: #simd[4]f32) -> i64 {…}

    _mm_cvttpd_epi32 ¶

    _mm_cvttpd_epi32 :: proc "c" (a: #simd[2]f64) -> #simd[2]i64 {…}

    _mm_cvttps_epi32 ¶

    _mm_cvttps_epi32 :: proc "c" (a: #simd[4]f32) -> #simd[2]i64 {…}

    _mm_cvttsd_si32 ¶

    _mm_cvttsd_si32 :: proc "c" (a: #simd[2]f64) -> i32 {…}

    _mm_cvttsd_si64 ¶

    _mm_cvttsd_si64 :: proc "c" (a: #simd[2]f64) -> i64 {…}

    _mm_cvttsd_si64x ¶

    _mm_cvttsd_si64x :: proc "c" (a: #simd[2]f64) -> i64 {…}

    _mm_cvttss_si64 ¶

    _mm_cvttss_si64 :: proc "c" (a: #simd[4]f32) -> i64 {…}

    _mm_div_pd ¶

    _mm_div_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_div_ps ¶

    _mm_div_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_div_sd ¶

    _mm_div_sd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_div_ss ¶

    _mm_div_ss :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_dp_pd ¶

    _mm_dp_pd :: proc "c" (a, b: #simd[2]f64, $IMM8: u8) -> #simd[2]f64 {…}

    _mm_dp_ps ¶

    _mm_dp_ps :: proc "c" (a, b: #simd[4]f32, $IMM8: u8) -> #simd[4]f32 {…}

    _mm_extract_epi16 ¶

    _mm_extract_epi16 :: proc "c" (a: #simd[2]i64, $IMM8: u32) -> i32 {…}

    _mm_extract_epi32 ¶

    _mm_extract_epi32 :: proc "c" (a: #simd[2]i64, $IMM8: u32) -> i32 {…}

    _mm_extract_epi64 ¶

    _mm_extract_epi64 :: proc "c" (a: #simd[2]i64, $IMM1: u32) -> i64 {…}

    _mm_extract_epi8 ¶

    _mm_extract_epi8 :: proc "c" (a: #simd[2]i64, $IMM8: u32) -> i32 {…}

    _mm_extract_ps ¶

    _mm_extract_ps :: proc "c" (a: #simd[4]f32, $IMM8: u32) -> i32 {…}

    _mm_floor_pd ¶

    _mm_floor_pd :: proc "c" (a: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_floor_ps ¶

    _mm_floor_ps :: proc "c" (a: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_floor_sd ¶

    _mm_floor_sd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_floor_ss ¶

    _mm_floor_ss :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_getcsr ¶

    _mm_getcsr :: proc "c" () -> (result: u32) {…}

    _mm_hadd_epi16 ¶

    _mm_hadd_epi16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_hadd_epi32 ¶

    _mm_hadd_epi32 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_hadd_pd ¶

    _mm_hadd_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_hadd_ps ¶

    _mm_hadd_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_hadds_epi16 ¶

    _mm_hadds_epi16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_hsub_epi16 ¶

    _mm_hsub_epi16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_hsub_epi32 ¶

    _mm_hsub_epi32 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_hsub_pd ¶

    _mm_hsub_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_hsub_ps ¶

    _mm_hsub_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_hsubs_epi16 ¶

    _mm_hsubs_epi16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_insert_epi16 ¶

    _mm_insert_epi16 :: proc "c" (a: #simd[2]i64, i: i32, $IMM8: u32) -> #simd[2]i64 {…}

    _mm_insert_epi32 ¶

    _mm_insert_epi32 :: proc "c" (a: #simd[2]i64, i: i32, $IMM8: u32) -> #simd[2]i64 {…}

    _mm_insert_epi64 ¶

    _mm_insert_epi64 :: proc "c" (a: #simd[2]i64, i: i64, $IMM1: u32) -> #simd[2]i64 {…}

    _mm_insert_epi8 ¶

    _mm_insert_epi8 :: proc "c" (a: #simd[2]i64, i: i32, $IMM8: u32) -> #simd[2]i64 {…}

    _mm_insert_ps ¶

    _mm_insert_ps :: proc "c" (a, b: #simd[4]f32, $IMM8: u8) -> #simd[4]f32 {…}

    _mm_lddqu_si128 ¶

    _mm_lddqu_si128 :: proc "c" (mem_addr: ^#simd[2]i64) -> #simd[2]i64 {…}

    _mm_lfence ¶

    _mm_lfence :: proc "c" () {…}

    _mm_load1_pd ¶

    _mm_load1_pd :: proc "c" (mem_addr: ^f64) -> #simd[2]f64 {…}

    _mm_load1_ps ¶

    _mm_load1_ps :: proc "c" (p: ^f32) -> #simd[4]f32 {…}

    _mm_load_pd ¶

    _mm_load_pd :: proc "c" (mem_addr: ^f64) -> #simd[2]f64 {…}

    _mm_load_pd1 ¶

    _mm_load_pd1 :: proc "c" (mem_addr: ^f64) -> #simd[2]f64 {…}

    _mm_load_ps ¶

    _mm_load_ps :: proc "c" (p: [^]f32) -> #simd[4]f32 {…}

    _mm_load_sd ¶

    _mm_load_sd :: proc "c" (mem_addr: ^f64) -> #simd[2]f64 {…}

    _mm_load_si128 ¶

    _mm_load_si128 :: proc "c" (mem_addr: ^#simd[2]i64) -> #simd[2]i64 {…}

    _mm_load_ss ¶

    _mm_load_ss :: proc "c" (p: ^f32) -> #simd[4]f32 {…}

    _mm_loaddup_pd ¶

    _mm_loaddup_pd :: proc "c" (mem_addr: [^]f64) -> #simd[2]f64 {…}

    _mm_loadh_pd ¶

    _mm_loadh_pd :: proc "c" (a: #simd[2]f64, mem_addr: ^f64) -> #simd[2]f64 {…}

    _mm_loadl_epi64 ¶

    _mm_loadl_epi64 :: proc "c" (mem_addr: ^#simd[2]i64) -> #simd[2]i64 {…}

    _mm_loadl_pd ¶

    _mm_loadl_pd :: proc "c" (a: #simd[2]f64, mem_addr: ^f64) -> #simd[2]f64 {…}

    _mm_loadr_pd ¶

    _mm_loadr_pd :: proc "c" (mem_addr: ^f64) -> #simd[2]f64 {…}

    _mm_loadr_ps ¶

    _mm_loadr_ps :: proc "c" (p: [^]f32) -> #simd[4]f32 {…}

    _mm_loadu_pd ¶

    _mm_loadu_pd :: proc "c" (mem_addr: ^f64) -> #simd[2]f64 {…}

    _mm_loadu_ps ¶

    _mm_loadu_ps :: proc "c" (p: [^]f32) -> #simd[4]f32 {…}

    _mm_loadu_si128 ¶

    _mm_loadu_si128 :: proc "c" (mem_addr: ^#simd[2]i64) -> #simd[2]i64 {…}

    _mm_loadu_si64 ¶

    _mm_loadu_si64 :: proc "c" (mem_addr: rawptr) -> #simd[2]i64 {…}

    _mm_madd_epi16 ¶

    _mm_madd_epi16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_maddubs_epi16 ¶

    _mm_maddubs_epi16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_maskload_pd ¶

    _mm_maskload_pd :: proc "c" (mem_addr: ^f64, mask: #simd[2]i64) -> #simd[2]f64 {…}
     

    Loads packed double-precision (64-bit) floating-point elements from memory into result using mask (elements are zeroed out when the high bit of the corresponding element is not set).

    _mm_maskload_ps ¶

    _mm_maskload_ps :: proc "c" (mem_addr: ^f32, mask: #simd[2]i64) -> #simd[4]f32 {…}
     

    Loads packed single-precision (32-bit) floating-point elements from memory into result using mask (elements are zeroed out when the high bit of the corresponding element is not set).

    _mm_maskmoveu_si128 ¶

    _mm_maskmoveu_si128 :: proc "c" (a, mask: #simd[2]i64, mem_addr: rawptr) {…}

    _mm_maskstore_pd ¶

    _mm_maskstore_pd :: proc "c" (mem_addr: ^f64, mask: #simd[2]i64, a: #simd[2]f64) {…}
     

    Stores packed double-precision (64-bit) floating-point elements from a into memory using mask.

    _mm_maskstore_ps ¶

    _mm_maskstore_ps :: proc "c" (mem_addr: ^f32, mask: #simd[2]i64, a: #simd[4]f32) {…}
     

    Stores packed single-precision (32-bit) floating-point elements from a into memory using mask.

    _mm_max_epi16 ¶

    _mm_max_epi16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_max_epi32 ¶

    _mm_max_epi32 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_max_epi8 ¶

    _mm_max_epi8 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_max_epu16 ¶

    _mm_max_epu16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_max_epu32 ¶

    _mm_max_epu32 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_max_epu8 ¶

    _mm_max_epu8 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_max_pd ¶

    _mm_max_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_max_ps ¶

    _mm_max_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_max_sd ¶

    _mm_max_sd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_max_ss ¶

    _mm_max_ss :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_mfence ¶

    _mm_mfence :: proc "c" () {…}

    _mm_min_epi16 ¶

    _mm_min_epi16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_min_epi32 ¶

    _mm_min_epi32 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_min_epi8 ¶

    _mm_min_epi8 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_min_epu16 ¶

    _mm_min_epu16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_min_epu32 ¶

    _mm_min_epu32 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_min_epu8 ¶

    _mm_min_epu8 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_min_pd ¶

    _mm_min_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_min_ps ¶

    _mm_min_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_min_sd ¶

    _mm_min_sd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_min_ss ¶

    _mm_min_ss :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_minpos_epu16 ¶

    _mm_minpos_epu16 :: proc "c" (a: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_move_epi64 ¶

    _mm_move_epi64 :: proc "c" (a: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_move_sd ¶

    _mm_move_sd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_move_ss ¶

    _mm_move_ss :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_movedup_pd ¶

    _mm_movedup_pd :: proc "c" (a: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_movehdup_ps ¶

    _mm_movehdup_ps :: proc "c" (a: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_movehl_ps ¶

    _mm_movehl_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_moveldup_ps ¶

    _mm_moveldup_ps :: proc "c" (a: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_movelh_ps ¶

    _mm_movelh_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_movemask_epi8 ¶

    _mm_movemask_epi8 :: proc "c" (a: #simd[2]i64) -> i32 {…}

    _mm_movemask_pd ¶

    _mm_movemask_pd :: proc "c" (a: #simd[2]f64) -> i32 {…}

    _mm_movemask_ps ¶

    _mm_movemask_ps :: proc "c" (a: #simd[4]f32) -> u32 {…}

    _mm_mpsadbw_epu8 ¶

    _mm_mpsadbw_epu8 :: proc "c" (a: #simd[2]i64, b: #simd[2]i64, $IMM8: i8) -> #simd[2]i64 {…}

    _mm_mul_epi32 ¶

    _mm_mul_epi32 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_mul_epu32 ¶

    _mm_mul_epu32 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_mul_pd ¶

    _mm_mul_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_mul_ps ¶

    _mm_mul_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_mul_sd ¶

    _mm_mul_sd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_mul_ss ¶

    _mm_mul_ss :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_mulhi_epi16 ¶

    _mm_mulhi_epi16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_mulhi_epu16 ¶

    _mm_mulhi_epu16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_mulhrs_epi16 ¶

    _mm_mulhrs_epi16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_mullo_epi16 ¶

    _mm_mullo_epi16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_mullo_epi32 ¶

    _mm_mullo_epi32 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_or_pd ¶

    _mm_or_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_or_ps ¶

    _mm_or_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_or_si128 ¶

    _mm_or_si128 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_packs_epi16 ¶

    _mm_packs_epi16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_packs_epi32 ¶

    _mm_packs_epi32 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_packus_epi16 ¶

    _mm_packus_epi16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_packus_epi32 ¶

    _mm_packus_epi32 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_pause ¶

    _mm_pause :: proc "c" () {…}

    _mm_permute_pd ¶

    _mm_permute_pd :: proc "c" (a: #simd[2]f64, $IMM2: u8) -> #simd[2]f64 {…}
     

    Shuffles double-precision (64-bit) floating-point elements in a using the control in imm8.

    _mm_permute_ps ¶

    _mm_permute_ps :: proc "c" (a: #simd[4]f32, $IMM8: u32) -> #simd[4]f32 {…}
     

    Shuffles single-precision (32-bit) floating-point elements in a using the control in imm8.

    _mm_permutevar_pd ¶

    _mm_permutevar_pd :: proc "c" (a: #simd[2]f64, b: #simd[2]i64) -> #simd[2]f64 {…}
     

    Shuffles double-precision (64-bit) floating-point elements in a using the control in b.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutevar_pd)

    _mm_permutevar_ps ¶

    _mm_permutevar_ps :: proc "c" (a: #simd[4]f32, b: #simd[2]i64) -> #simd[4]f32 {…}
     

    Shuffles single-precision (32-bit) floating-point elements in a using the control in b.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutevar_ps)

    _mm_prefetch ¶

    _mm_prefetch :: proc "c" (p: rawptr, $STRATEGY: u32) {…}

    _mm_rcp_ps ¶

    _mm_rcp_ps :: proc "c" (a: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_rcp_ss ¶

    _mm_rcp_ss :: proc "c" (a: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_round_pd ¶

    _mm_round_pd :: proc "c" (a: #simd[2]f64, $ROUNDING: i32) -> #simd[2]f64 {…}

    _mm_round_ps ¶

    _mm_round_ps :: proc "c" (a: #simd[4]f32, $ROUNDING: i32) -> #simd[4]f32 {…}

    _mm_round_sd ¶

    _mm_round_sd :: proc "c" (a, b: #simd[2]f64, $ROUNDING: i32) -> #simd[2]f64 {…}

    _mm_round_ss ¶

    _mm_round_ss :: proc "c" (a, b: #simd[4]f32, $ROUNDING: i32) -> #simd[4]f32 {…}

    _mm_rsqrt_ps ¶

    _mm_rsqrt_ps :: proc "c" (a: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_rsqrt_ss ¶

    _mm_rsqrt_ss :: proc "c" (a: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_sad_epu8 ¶

    _mm_sad_epu8 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_set1_epi16 ¶

    _mm_set1_epi16 :: proc "c" (a: i16) -> #simd[2]i64 {…}

    _mm_set1_epi32 ¶

    _mm_set1_epi32 :: proc "c" (a: i32) -> #simd[2]i64 {…}

    _mm_set1_epi64x ¶

    _mm_set1_epi64x :: proc "c" (a: i64) -> #simd[2]i64 {…}

    _mm_set1_epi8 ¶

    _mm_set1_epi8 :: proc "c" (a: i8) -> #simd[2]i64 {…}

    _mm_set1_pd ¶

    _mm_set1_pd :: proc "c" (a: f64) -> #simd[2]f64 {…}

    _mm_set1_ps ¶

    _mm_set1_ps :: proc "c" (a: f32) -> #simd[4]f32 {…}

    _mm_set_epi16 ¶

    _mm_set_epi16 :: proc "c" (
    	e7, e6, e5, e4, e3, e2, e1, 
    	e0:                         i16, 
    ) -> #simd[2]i64 {…}

    _mm_set_epi32 ¶

    _mm_set_epi32 :: proc "c" (e3, e2, e1, e0: i32) -> #simd[2]i64 {…}

    _mm_set_epi64x ¶

    _mm_set_epi64x :: proc "c" (e1, e0: i64) -> #simd[2]i64 {…}

    _mm_set_epi8 ¶

    _mm_set_epi8 :: proc "c" (
    	e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, 
    	e0:                                                               i8, 
    ) -> #simd[2]i64 {…}

    _mm_set_pd ¶

    _mm_set_pd :: proc "c" (a, b: f64) -> #simd[2]f64 {…}

    _mm_set_pd1 ¶

    _mm_set_pd1 :: proc "c" (a: f64) -> #simd[2]f64 {…}

    _mm_set_ps ¶

    _mm_set_ps :: proc "c" (a, b, c, d: f32) -> #simd[4]f32 {…}

    _mm_set_sd ¶

    _mm_set_sd :: proc "c" (a: f64) -> #simd[2]f64 {…}

    _mm_set_ss ¶

    _mm_set_ss :: proc "c" (a: f32) -> #simd[4]f32 {…}

    _mm_setcsr ¶

    _mm_setcsr :: proc "c" (val: u32) {…}

    _mm_setr_epi16 ¶

    _mm_setr_epi16 :: proc "c" (
    	e7, e6, e5, e4, e3, e2, e1, 
    	e0:                         i16, 
    ) -> #simd[2]i64 {…}

    _mm_setr_epi32 ¶

    _mm_setr_epi32 :: proc "c" (e3, e2, e1, e0: i32) -> #simd[2]i64 {…}

    _mm_setr_epi8 ¶

    _mm_setr_epi8 :: proc "c" (
    	e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, 
    	e0:                                                               i8, 
    ) -> #simd[2]i64 {…}

    _mm_setr_pd ¶

    _mm_setr_pd :: proc "c" (a, b: f64) -> #simd[2]f64 {…}

    _mm_setr_ps ¶

    _mm_setr_ps :: proc "c" (a, b, c, d: f32) -> #simd[4]f32 {…}

    _mm_setzero_pd ¶

    _mm_setzero_pd :: proc "c" () -> #simd[2]f64 {…}

    _mm_setzero_ps ¶

    _mm_setzero_ps :: proc "c" () -> #simd[4]f32 {…}

    _mm_setzero_si128 ¶

    _mm_setzero_si128 :: proc "c" () -> #simd[2]i64 {…}

    _mm_sfence ¶

    _mm_sfence :: proc "c" () {…}

    _mm_sha1msg1_epu32 ¶

    _mm_sha1msg1_epu32 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_sha1msg2_epu32 ¶

    _mm_sha1msg2_epu32 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_sha1nexte_epu32 ¶

    _mm_sha1nexte_epu32 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_sha1rnds4_epu32 ¶

    _mm_sha1rnds4_epu32 :: proc "c" (a, b: #simd[2]i64, $FUNC: u32) -> #simd[2]i64 {…}

    _mm_sha256msg1_epu32 ¶

    _mm_sha256msg1_epu32 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_sha256msg2_epu32 ¶

    _mm_sha256msg2_epu32 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_sha256rnds2_epu32 ¶

    _mm_sha256rnds2_epu32 :: proc "c" (a, b, k: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_shuffle_epi32 ¶

    _mm_shuffle_epi32 :: proc "c" (a: #simd[2]i64, $IMM8: u32) -> #simd[2]i64 {…}

    _mm_shuffle_epi8 ¶

    _mm_shuffle_epi8 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_shuffle_pd ¶

    _mm_shuffle_pd :: proc "c" (a, b: #simd[2]f64, $MASK: u32) -> #simd[2]f64 {…}

    _mm_shuffle_ps ¶

    _mm_shuffle_ps :: proc "c" (a, b: #simd[4]f32, $MASK: u32) -> #simd[4]f32 {…}

    _mm_shufflehi_epi16 ¶

    _mm_shufflehi_epi16 :: proc "c" (a: #simd[2]i64, $IMM8: u32) -> #simd[2]i64 {…}

    _mm_shufflelo_epi16 ¶

    _mm_shufflelo_epi16 :: proc "c" (a: #simd[2]i64, $IMM8: u32) -> #simd[2]i64 {…}

    _mm_sign_epi16 ¶

    _mm_sign_epi16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_sign_epi32 ¶

    _mm_sign_epi32 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_sign_epi8 ¶

    _mm_sign_epi8 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_sll_epi16 ¶

    _mm_sll_epi16 :: proc "c" (a, count: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_sll_epi32 ¶

    _mm_sll_epi32 :: proc "c" (a, count: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_sll_epi64 ¶

    _mm_sll_epi64 :: proc "c" (a, count: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_slli_epi16 ¶

    _mm_slli_epi16 :: proc "c" (a: #simd[2]i64, $IMM8: u32) -> #simd[2]i64 {…}

    _mm_slli_epi32 ¶

    _mm_slli_epi32 :: proc "c" (a: #simd[2]i64, $IMM8: u32) -> #simd[2]i64 {…}

    _mm_slli_epi64 ¶

    _mm_slli_epi64 :: proc "c" (a: #simd[2]i64, $IMM8: u32) -> #simd[2]i64 {…}

    _mm_slli_si128 ¶

    _mm_slli_si128 :: proc "c" (a: #simd[2]i64, $IMM8: u32) -> #simd[2]i64 {…}

    _mm_sqrt_pd ¶

    _mm_sqrt_pd :: proc "c" (a: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_sqrt_ps ¶

    _mm_sqrt_ps :: proc "c" (a: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_sqrt_sd ¶

    _mm_sqrt_sd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_sqrt_ss ¶

    _mm_sqrt_ss :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_sra_epi16 ¶

    _mm_sra_epi16 :: proc "c" (a, count: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_sra_epi32 ¶

    _mm_sra_epi32 :: proc "c" (a, count: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_srai_epi16 ¶

    _mm_srai_epi16 :: proc "c" (a: #simd[2]i64, $IMM8: u32) -> #simd[2]i64 {…}

    _mm_srai_epi32 ¶

    _mm_srai_epi32 :: proc "c" (a: #simd[2]i64, $IMM8: u32) -> #simd[2]i64 {…}

    _mm_srl_epi16 ¶

    _mm_srl_epi16 :: proc "c" (a, count: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_srl_epi32 ¶

    _mm_srl_epi32 :: proc "c" (a, count: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_srl_epi64 ¶

    _mm_srl_epi64 :: proc "c" (a, count: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_srli_epi16 ¶

    _mm_srli_epi16 :: proc "c" (a: #simd[2]i64, $IMM8: u32) -> #simd[2]i64 {…}

    _mm_srli_epi32 ¶

    _mm_srli_epi32 :: proc "c" (a: #simd[2]i64, $IMM8: u32) -> #simd[2]i64 {…}

    _mm_srli_epi64 ¶

    _mm_srli_epi64 :: proc "c" (a: #simd[2]i64, $IMM8: u32) -> #simd[2]i64 {…}

    _mm_srli_si128 ¶

    _mm_srli_si128 :: proc "c" (a: #simd[2]i64, $IMM8: u32) -> #simd[2]i64 {…}

    _mm_store1_pd ¶

    _mm_store1_pd :: proc "c" (mem_addr: ^f64, a: #simd[2]f64) {…}

    _mm_store1_ps ¶

    _mm_store1_ps :: proc "c" (p: [^]f32, a: #simd[4]f32) {…}

    _mm_store_pd ¶

    _mm_store_pd :: proc "c" (mem_addr: ^f64, a: #simd[2]f64) {…}

    _mm_store_pd1 ¶

    _mm_store_pd1 :: proc "c" (mem_addr: ^f64, a: #simd[2]f64) {…}

    _mm_store_ps ¶

    _mm_store_ps :: proc "c" (p: [^]f32, a: #simd[4]f32) {…}

    _mm_store_sd ¶

    _mm_store_sd :: proc "c" (mem_addr: ^f64, a: #simd[2]f64) {…}

    _mm_store_si128 ¶

    _mm_store_si128 :: proc "c" (mem_addr: ^#simd[2]i64, a: #simd[2]i64) {…}

    _mm_store_ss ¶

    _mm_store_ss :: proc "c" (p: ^f32, a: #simd[4]f32) {…}

    _mm_storeh_pd ¶

    _mm_storeh_pd :: proc "c" (mem_addr: ^f64, a: #simd[2]f64) {…}

    _mm_storel_epi64 ¶

    _mm_storel_epi64 :: proc "c" (mem_addr: ^#simd[2]i64, a: #simd[2]i64) {…}

    _mm_storel_pd ¶

    _mm_storel_pd :: proc "c" (mem_addr: ^f64, a: #simd[2]f64) {…}

    _mm_storer_pd ¶

    _mm_storer_pd :: proc "c" (mem_addr: ^f64, a: #simd[2]f64) {…}

    _mm_storer_ps ¶

    _mm_storer_ps :: proc "c" (p: [^]f32, a: #simd[4]f32) {…}

    _mm_storeu_pd ¶

    _mm_storeu_pd :: proc "c" (mem_addr: ^f64, a: #simd[2]f64) {…}

    _mm_storeu_ps ¶

    _mm_storeu_ps :: proc "c" (p: [^]f32, a: #simd[4]f32) {…}

    _mm_storeu_si128 ¶

    _mm_storeu_si128 :: proc "c" (mem_addr: ^#simd[2]i64, a: #simd[2]i64) {…}

    _mm_stream_pd ¶

    _mm_stream_pd :: proc "c" (mem_addr: ^f64, a: #simd[2]f64) {…}

    _mm_stream_ps ¶

    _mm_stream_ps :: proc "c" (addr: [^]f32, a: #simd[4]f32) {…}

    _mm_stream_si128 ¶

    _mm_stream_si128 :: proc "c" (mem_addr: ^#simd[2]i64, a: #simd[2]i64) {…}

    _mm_stream_si32 ¶

    _mm_stream_si32 :: proc "c" (mem_addr: ^i32, a: i32) {…}

    _mm_stream_si64 ¶

    _mm_stream_si64 :: proc "c" (mem_addr: ^i64, a: i64) {…}

    _mm_sub_epi16 ¶

    _mm_sub_epi16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_sub_epi32 ¶

    _mm_sub_epi32 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_sub_epi64 ¶

    _mm_sub_epi64 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_sub_epi8 ¶

    _mm_sub_epi8 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_sub_pd ¶

    _mm_sub_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_sub_ps ¶

    _mm_sub_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_sub_sd ¶

    _mm_sub_sd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_sub_ss ¶

    _mm_sub_ss :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_subs_epi16 ¶

    _mm_subs_epi16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_subs_epi8 ¶

    _mm_subs_epi8 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_subs_epu16 ¶

    _mm_subs_epu16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_subs_epu8 ¶

    _mm_subs_epu8 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_test_all_ones ¶

    _mm_test_all_ones :: proc "c" (a: #simd[2]i64) -> i32 {…}

    _mm_test_all_zeros ¶

    _mm_test_all_zeros :: proc "c" (a: #simd[2]i64, mask: #simd[2]i64) -> i32 {…}

    _mm_test_mix_ones_zeros ¶

    _mm_test_mix_ones_zeros :: proc "c" (a: #simd[2]i64, mask: #simd[2]i64) -> i32 {…}

    _mm_testc_pd ¶

    _mm_testc_pd :: proc "c" (a, b: #simd[2]f64) -> i32 {…}
     

    Computes the bitwise AND of 128 bits (representing double-precision (64-bit) floating-point elements) in a and b, producing an intermediate 128-bit value, and set ZF to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set ZF to 0. Compute the bitwise NOT of a and then AND with b, producing an intermediate value, and set CF to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set CF to 0. Return the CF value.

    _mm_testc_ps ¶

    _mm_testc_ps :: proc "c" (a, b: #simd[4]f32) -> i32 {…}
     

    Computes the bitwise AND of 128 bits (representing single-precision (32-bit) floating-point elements) in a and b, producing an intermediate 128-bit value, and set ZF to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set ZF to 0. Compute the bitwise NOT of a and then AND with b, producing an intermediate value, and set CF to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set CF to 0. Return the CF value.

    _mm_testc_si128 ¶

    _mm_testc_si128 :: proc "c" (a: #simd[2]i64, mask: #simd[2]i64) -> i32 {…}

    _mm_testnzc_pd ¶

    _mm_testnzc_pd :: proc "c" (a, b: #simd[2]f64) -> i32 {…}
     

    Computes the bitwise AND of 128 bits (representing double-precision (64-bit) floating-point elements) in a and b, producing an intermediate 128-bit value, and set ZF to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set ZF to 0. Compute the bitwise NOT of a and then AND with b, producing an intermediate value, and set CF to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set CF to 0. Return 1 if both the ZF and CF values are zero, otherwise return 0.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testnzc_pd)

    _mm_testnzc_ps ¶

    _mm_testnzc_ps :: proc "c" (a, b: #simd[4]f32) -> i32 {…}
     

    Computes the bitwise AND of 128 bits (representing single-precision (32-bit) floating-point elements) in a and b, producing an intermediate 128-bit value, and set ZF to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set ZF to 0. Compute the bitwise NOT of a and then AND with b, producing an intermediate value, and set CF to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set CF to 0. Return 1 if both the ZF and CF values are zero, otherwise return 0.

    [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testnzc_ps)

    _mm_testnzc_si128 ¶

    _mm_testnzc_si128 :: proc "c" (a: #simd[2]i64, mask: #simd[2]i64) -> i32 {…}

    _mm_testz_pd ¶

    _mm_testz_pd :: proc "c" (a, b: #simd[2]f64) -> i32 {…}
     

    Computes the bitwise AND of 128 bits (representing double-precision (64-bit) floating-point elements) in a and b, producing an intermediate 128-bit value, and set ZF to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set ZF to 0. Compute the bitwise NOT of a and then AND with b, producing an intermediate value, and set CF to 1 if the sign bit of each 64-bit element in the intermediate value is zero, otherwise set CF to 0. Return the ZF value.

    _mm_testz_ps ¶

    _mm_testz_ps :: proc "c" (a, b: #simd[4]f32) -> i32 {…}
     

    Computes the bitwise AND of 128 bits (representing single-precision (32-bit) floating-point elements) in a and b, producing an intermediate 128-bit value, and set ZF to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set ZF to 0. Compute the bitwise NOT of a and then AND with b, producing an intermediate value, and set CF to 1 if the sign bit of each 32-bit element in the intermediate value is zero, otherwise set CF to 0. Return the ZF value.

    _mm_testz_si128 ¶

    _mm_testz_si128 :: proc "c" (a: #simd[2]i64, mask: #simd[2]i64) -> i32 {…}

    _mm_ucomieq_sd ¶

    _mm_ucomieq_sd :: proc "c" (a, b: #simd[2]f64) -> i32 {…}

    _mm_ucomieq_ss ¶

    _mm_ucomieq_ss :: proc "c" (a, b: #simd[4]f32) -> b32 {…}

    _mm_ucomige_sd ¶

    _mm_ucomige_sd :: proc "c" (a, b: #simd[2]f64) -> i32 {…}

    _mm_ucomige_ss ¶

    _mm_ucomige_ss :: proc "c" (a, b: #simd[4]f32) -> b32 {…}

    _mm_ucomigt_sd ¶

    _mm_ucomigt_sd :: proc "c" (a, b: #simd[2]f64) -> i32 {…}

    _mm_ucomigt_ss ¶

    _mm_ucomigt_ss :: proc "c" (a, b: #simd[4]f32) -> b32 {…}

    _mm_ucomile_sd ¶

    _mm_ucomile_sd :: proc "c" (a, b: #simd[2]f64) -> i32 {…}

    _mm_ucomile_ss ¶

    _mm_ucomile_ss :: proc "c" (a, b: #simd[4]f32) -> b32 {…}

    _mm_ucomilt_sd ¶

    _mm_ucomilt_sd :: proc "c" (a, b: #simd[2]f64) -> i32 {…}

    _mm_ucomilt_ss ¶

    _mm_ucomilt_ss :: proc "c" (a, b: #simd[4]f32) -> b32 {…}

    _mm_ucomineq_sd ¶

    _mm_ucomineq_sd :: proc "c" (a, b: #simd[2]f64) -> i32 {…}

    _mm_ucomineq_ss ¶

    _mm_ucomineq_ss :: proc "c" (a, b: #simd[4]f32) -> b32 {…}

    _mm_undefined_pd ¶

    _mm_undefined_pd :: proc "c" () -> #simd[2]f64 {…}

    _mm_undefined_ps ¶

    _mm_undefined_ps :: proc "c" () -> #simd[4]f32 {…}

    _mm_undefined_si128 ¶

    _mm_undefined_si128 :: proc "c" () -> #simd[2]i64 {…}

    _mm_unpackhi_epi16 ¶

    _mm_unpackhi_epi16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_unpackhi_epi32 ¶

    _mm_unpackhi_epi32 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_unpackhi_epi64 ¶

    _mm_unpackhi_epi64 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_unpackhi_epi8 ¶

    _mm_unpackhi_epi8 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_unpackhi_pd ¶

    _mm_unpackhi_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_unpackhi_ps ¶

    _mm_unpackhi_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_unpacklo_epi16 ¶

    _mm_unpacklo_epi16 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_unpacklo_epi32 ¶

    _mm_unpacklo_epi32 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_unpacklo_epi64 ¶

    _mm_unpacklo_epi64 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_unpacklo_epi8 ¶

    _mm_unpacklo_epi8 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _mm_unpacklo_pd ¶

    _mm_unpacklo_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_unpacklo_ps ¶

    _mm_unpacklo_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_xor_pd ¶

    _mm_xor_pd :: proc "c" (a, b: #simd[2]f64) -> #simd[2]f64 {…}

    _mm_xor_ps ¶

    _mm_xor_ps :: proc "c" (a, b: #simd[4]f32) -> #simd[4]f32 {…}

    _mm_xor_si128 ¶

    _mm_xor_si128 :: proc "c" (a, b: #simd[2]i64) -> #simd[2]i64 {…}

    _pdep_u32 ¶

    _pdep_u32 :: proc "c" (a, mask: u32) -> u32 {…}

    _pdep_u64 ¶

    _pdep_u64 :: proc "c" (a, mask: u64) -> u64 {…}

    _pext_u32 ¶

    _pext_u32 :: proc "c" (a, mask: u32) -> u32 {…}

    _pext_u64 ¶

    _pext_u64 :: proc "c" (a, mask: u64) -> u64 {…}

    _popcnt32 ¶

    _popcnt32 :: proc "c" (x: u32) -> i32 {…}

    _popcnt64 ¶

    _popcnt64 :: proc "c" (x: u64) -> i32 {…}

    _rdtsc ¶

    _rdtsc :: proc "c" () -> u64 {…}

    _subborrow_u32 ¶

    _subborrow_u32 :: proc "c" (c_in: u8, a: u32, b: u32, out: ^u32) -> u8 {…}

    _subborrow_u64 ¶

    _subborrow_u64 :: proc "c" (c_in: u8, a: u64, b: u64, out: ^u64) -> u8 {…}

    _tzcnt_u16 ¶

    _tzcnt_u16 :: proc "c" (a: u16) -> u16 {…}

    _tzcnt_u32 ¶

    _tzcnt_u32 :: proc "c" (a: u32) -> u32 {…}

    _tzcnt_u64 ¶

    _tzcnt_u64 :: proc "c" (a: u64) -> u64 {…}

    cmpxchg16b ¶

    cmpxchg16b :: proc "c" (dst: ^u128, old, new: u128, $success, $failure: .Atomic_Memory_Order) -> (val: u128) {…}

    Procedure Groups

    This section is empty.

    Source Files

    Generation Information

    Generated with odin version dev-2026-04 (vendor "odin") Windows_amd64 @ 2026-04-21 21:38:43.467742800 +0000 UTC