summaryrefslogtreecommitdiff
path: root/libgfortran/generated/matmul_i4.c
diff options
context:
space:
mode:
Diffstat (limited to 'libgfortran/generated/matmul_i4.c')
-rw-r--r--libgfortran/generated/matmul_i4.c38
1 files changed, 38 insertions, 0 deletions
diff --git a/libgfortran/generated/matmul_i4.c b/libgfortran/generated/matmul_i4.c
index 475b214ae79..f8f0cdb96cc 100644
--- a/libgfortran/generated/matmul_i4.c
+++ b/libgfortran/generated/matmul_i4.c
@@ -1734,6 +1734,24 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
#endif /* HAVE_AVX512F */
+/* AMD-specifix funtions with AVX128 and FMA3/FMA4. */
+
+#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
+void
+matmul_i4_avx128_fma3 (gfc_array_i4 * const restrict retarray,
+ gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+ int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma")));
+internal_proto(matmul_i4_avx128_fma3);
+#endif
+
+#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
+void
+matmul_i4_avx128_fma4 (gfc_array_i4 * const restrict retarray,
+ gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+ int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4")));
+internal_proto(matmul_i4_avx128_fma4);
+#endif
+
/* Function to fall back to if there is no special processor-specific version. */
static void
matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
@@ -2332,6 +2350,26 @@ void matmul_i4 (gfc_array_i4 * const restrict retarray,
}
#endif /* HAVE_AVX */
}
+ else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
+ {
+#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
+ if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ {
+ matmul_fn = matmul_i4_avx128_fma3;
+ goto store;
+ }
+#endif
+#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
+ if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ {
+ matmul_fn = matmul_i4_avx128_fma4;
+ goto store;
+ }
+#endif
+
+ }
store:
__atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED);
}