Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save tejainece/3ad714e10bab7dbce3ed829d07e7f50a to your computer and use it in GitHub Desktop.
Save tejainece/3ad714e10bab7dbce3ed829d07e7f50a to your computer and use it in GitHub Desktop.
Experimental dig at cross platform SIMD
template <typename T> class Simd;
#if defined(TC_ARCH_X86)
#include <bits/stdc++.h>
#include <x86intrin.h>
uint16_t simdSize = 128;
uint16_t detectSimdSize() {
if (__builtin_cpu_supports("avx512f")) {
return 512;
} else if (__builtin_cpu_supports("avx")) {
return 256;
} else {
return 128;
}
}
void initializeSimd() { simdSize = detectSimdSize(); }
template <typename T> class Register {
public:
typedef union {
T data __attribute__((vector_size(16)));
T data256 __attribute__((vector_size(32)));
T data512 __attribute__((vector_size(64)));
} Type;
Type data;
explicit Register(T &val) {
if (simdSize == 512) {
data.data512 = val;
} else if (simdSize == 256) {
data.data256 = val;
} else {
data.data = val;
}
}
Register(T *val) {
if (simdSize == 512) {
__mm512_loadu_si512(&data.data512, val);
data.data512 = *val;
} else if (simdSize == 256) {
data.data256 = *val;
} else {
data.data = *val;
}
}
static uint16_t size() { return simdSize / (sizeof(T) * 8); }
};
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment