#include <iostream>
#include <numeric>
#define GENERATE(s) __attribute__((target(s),optimize("Ofast"))) \
void f(float * __restrict a, const float * __restrict b, float c, int n) { \
std::cout << #s << std::endl; \
for (int i = 0; i < n; i++) \
a[i] += c * b[i]; \
}
GENERATE("fma")
GENERATE("avx2")
GENERATE("avx")
GENERATE("sse4.2")
GENERATE("default")
int main() {
float a[100], b[100], c;
f(a, b, c, 100);
std::cout << std::accumulate(a, a+100, 0.0f) << std::endl;
}
I2luY2x1ZGUgPGlvc3RyZWFtPgojaW5jbHVkZSA8bnVtZXJpYz4KCiNkZWZpbmUgR0VORVJBVEUocykgX19hdHRyaWJ1dGVfXygodGFyZ2V0KHMpLG9wdGltaXplKCJPZmFzdCIpKSkgXAp2b2lkIGYoZmxvYXQgKiBfX3Jlc3RyaWN0IGEsIGNvbnN0IGZsb2F0ICogX19yZXN0cmljdCBiLCBmbG9hdCBjLCBpbnQgbikgeyBcCglzdGQ6OmNvdXQgPDwgI3MgPDwgc3RkOjplbmRsOyBcCglmb3IgKGludCBpID0gMDsgaSA8IG47IGkrKykgXAoJCWFbaV0gKz0gYyAqIGJbaV07IFwKfQoKR0VORVJBVEUoImZtYSIpCkdFTkVSQVRFKCJhdngyIikKR0VORVJBVEUoImF2eCIpCkdFTkVSQVRFKCJzc2U0LjIiKQpHRU5FUkFURSgiZGVmYXVsdCIpCgoKaW50IG1haW4oKSB7CglmbG9hdCBhWzEwMF0sIGJbMTAwXSwgYzsKCWYoYSwgYiwgYywgMTAwKTsKCXN0ZDo6Y291dCA8PCBzdGQ6OmFjY3VtdWxhdGUoYSwgYSsxMDAsIDAuMGYpIDw8IHN0ZDo6ZW5kbDsKfQ==