Exercice¶
Maintenant que vous vous êtes familiarisé avec les instructions AVX, pouvez-vous deviner ce que réalise le code AVX512 suivant :
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include <immintrin.h>
typedef union
{
__m128i vec128;
uint64_t vec64[2];
uint32_t vec32[4];
uint16_t vec16[8];
uint8_t vec8[16];
} __reg128;
typedef union
{
__m256i vec256;
__m128i vec128[2];
uint64_t vec64[4];
uint32_t vec32[8];
uint16_t vec16[16];
uint8_t vec8[32];
} __reg256;
typedef union
{
__m512i vec512;
__m256i vec256[2];
__m128i vec128[4];
uint64_t vec64[8];
uint32_t vec32[16];
uint16_t vec16[32];
uint8_t vec8[64];
} __reg512;
void print128(char *s, int mode, __reg128 A)
{
switch(mode)
{
case 8 :
printf("%s=(",s);
for(int i =0; i < 15; i++) printf("'%2x',",A.vec8[i]);
printf("'%2x')\n",A.vec8[15]);
break;
case 16 :
printf("%s=(",s);
for(int i =0; i < 7; i++) printf("'%4x',",A.vec16[i]);
printf("'%4x')\n",A.vec16[7]);
break;
case 32 :
printf("%s=(",s);
for(int i =0; i < 3; i++) printf("'%8x',",A.vec32[i]);
printf("'%8x')\n",A.vec32[3]);
break;
case 64 :
printf("%s=('0x%lx','0x%lx')\n",s,A.vec64[0],A.vec64[1]);
}
}
void mystere(__reg256 *C, __reg128 A, __reg128 B) {
__reg512 aux1,aux2;
__reg128 middle;
aux1.vec512 = _mm512_set_epi64(0,A.vec64[0],0,A.vec64[1], 0, A.vec64[1],0, A.vec64[0]);
aux2.vec512 = _mm512_set_epi64(0,B.vec64[1],0,B.vec64[0], 0, B.vec64[1],0, B.vec64[0]);
aux1.vec512 = _mm512_clmulepi64_epi128(aux1.vec512, aux2.vec512, 0);
middle.vec128 = _mm_xor_si128(aux1.vec128[3],aux1.vec128[2]);
aux1.vec64[1]^=middle.vec64[0];
C->vec128[0] = aux1.vec128[0];
aux1.vec64[2]^=middle.vec64[1];
C->vec128[1] = aux1.vec128[1];
}
int main(void)
{
__reg128 A, B;
__reg256 *C = calloc(1,sizeof(__reg256));
A.vec128 = _mm_set_epi64x(0xfffabfffeeffffff,0xffffaa1256ee1234);
B.vec128 = _mm_set_epi64x(0xbfeefffdffffffff,0xea0d362010800099);
printf("Données : \n");
printf("A0=0x%lx\n", A.vec64[0]);
printf("A1=0x%lx\n", A.vec64[1]);
printf("B0=0x%lx\n", B.vec64[0]);
printf("B1=0x%lx\n\n", B.vec64[1]);
mystere(C, A, B);
printf("Resultat : \n");
print128("C0",64,(__reg128)(C->vec128[0]));
print128("C1",64,(__reg128)(C->vec128[1]));
}