summaryrefslogtreecommitdiff
path: root/libavutil
diff options
context:
space:
mode:
authorLynne <dev@lynne.ee>2022-11-19 00:58:13 +0100
committerLynne <dev@lynne.ee>2022-11-24 15:58:34 +0100
commit7f019e77586ab62d37856d09071fcdeef880bcd9 (patch)
treec3411d75d9d04af5116a368c42bf1686da96cb55 /libavutil
parent87bae6b0189d5cb71b836890078f96a4d1abd277 (diff)
downloadffmpeg-7f019e77586ab62d37856d09071fcdeef880bcd9.tar.gz
lavu/tx: add length decomposition function
Rather than using a list of lengths supported, this goes a step beyond and uses all registered codelets to come up with a good decomposition.
Diffstat (limited to 'libavutil')
-rw-r--r--libavutil/tx.c143
-rw-r--r--libavutil/tx_priv.h8
2 files changed, 151 insertions, 0 deletions
diff --git a/libavutil/tx.c b/libavutil/tx.c
index 8027e983ba..c692239656 100644
--- a/libavutil/tx.c
+++ b/libavutil/tx.c
@@ -17,6 +17,7 @@
*/
#include "avassert.h"
+#include "intmath.h"
#include "cpu.h"
#include "qsort.h"
#include "bprint.h"
@@ -395,6 +396,148 @@ static int get_codelet_prio(const FFTXCodelet *cd, int cpu_flags, int len)
return prio;
}
+typedef struct FFTXLenDecomp {
+ int len;
+ int len2;
+ int prio;
+ const FFTXCodelet *cd;
+} FFTXLenDecomp;
+
+static int cmp_decomp(FFTXLenDecomp *a, FFTXLenDecomp *b)
+{
+ return FFDIFFSIGN(b->prio, a->prio);
+}
+
+int ff_tx_decompose_length(int dst[TX_MAX_DECOMPOSITIONS], enum AVTXType type,
+ int len, int inv)
+{
+ int nb_decomp = 0;
+ FFTXLenDecomp ld[TX_MAX_DECOMPOSITIONS];
+ int codelet_list_idx = codelet_list_num;
+
+ const int cpu_flags = av_get_cpu_flags();
+
+ /* Loop through all codelets in all codelet lists to find matches
+ * to the requirements */
+ while (codelet_list_idx--) {
+ const FFTXCodelet * const * list = codelet_list[codelet_list_idx];
+ const FFTXCodelet *cd = NULL;
+
+ while ((cd = *list++)) {
+ int fl = len;
+ int skip = 0, prio;
+ int factors_product = 1, factors_mod = 0;
+
+ if (nb_decomp >= TX_MAX_DECOMPOSITIONS)
+ goto sort;
+
+ /* Check if the type matches */
+ if (cd->type != TX_TYPE_ANY && type != cd->type)
+ continue;
+
+ /* Check direction for non-orthogonal codelets */
+ if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
+ ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv))
+ continue;
+
+ /* Check if the CPU supports the required ISA */
+ if (cd->cpu_flags != FF_TX_CPU_FLAGS_ALL &&
+ !(cpu_flags & (cd->cpu_flags & ~cpu_slow_mask)))
+ continue;
+
+ for (int i = 0; i < TX_MAX_FACTORS; i++) {
+ if (!cd->factors[i] || (fl == 1))
+ break;
+
+ if (cd->factors[i] == TX_FACTOR_ANY) {
+ factors_mod++;
+ factors_product *= fl;
+ } else if (!(fl % cd->factors[i])) {
+ factors_mod++;
+ if (cd->factors[i] == 2) {
+ int b = ff_ctz(fl);
+ fl >>= b;
+ factors_product <<= b;
+ } else {
+ do {
+ fl /= cd->factors[i];
+ factors_product *= cd->factors[i];
+ } while (!(fl % cd->factors[i]));
+ }
+ }
+ }
+
+ /* Disqualify if factor requirements are not satisfied or if trivial */
+ if ((factors_mod < cd->nb_factors) || (len == factors_product))
+ continue;
+
+ if (av_gcd(factors_product, fl) != 1)
+ continue;
+
+ /* Check if length is supported and factorization was successful */
+ if ((factors_product < cd->min_len) ||
+ (cd->max_len != TX_LEN_UNLIMITED && (factors_product > cd->max_len)))
+ continue;
+
+ prio = get_codelet_prio(cd, cpu_flags, factors_product) * factors_product;
+
+ /* Check for duplicates */
+ for (int i = 0; i < nb_decomp; i++) {
+ if (factors_product == ld[i].len) {
+ /* Update priority if new one is higher */
+ if (prio > ld[i].prio)
+ ld[i].prio = prio;
+ skip = 1;
+ break;
+ }
+ }
+
+ /* Add decomposition if unique */
+ if (!skip) {
+ ld[nb_decomp].cd = cd;
+ ld[nb_decomp].len = factors_product;
+ ld[nb_decomp].len2 = fl;
+ ld[nb_decomp].prio = prio;
+ nb_decomp++;
+ }
+ }
+ }
+
+ if (!nb_decomp)
+ return AVERROR(EINVAL);
+
+sort:
+ AV_QSORT(ld, nb_decomp, FFTXLenDecomp, cmp_decomp);
+
+ for (int i = 0; i < nb_decomp; i++) {
+ if (ld[i].cd->nb_factors > 1)
+ dst[i] = ld[i].len2;
+ else
+ dst[i] = ld[i].len;
+ }
+
+ return nb_decomp;
+}
+
+int ff_tx_gen_default_map(AVTXContext *s, FFTXCodeletOptions *opts)
+{
+ s->map = av_malloc(s->len*sizeof(*s->map));
+ if (!s->map)
+ return AVERROR(ENOMEM);
+
+ s->map[0] = 0; /* DC is always at the start */
+ if (s->inv) /* Reversing the ACs flips the transform direction */
+ for (int i = 1; i < s->len; i++)
+ s->map[i] = s->len - i;
+ else
+ for (int i = 1; i < s->len; i++)
+ s->map[i] = i;
+
+ s->map_dir = FF_TX_MAP_GATHER;
+
+ return 0;
+}
+
#if !CONFIG_SMALL
static void print_flags(AVBPrint *bp, uint64_t f)
{
diff --git a/libavutil/tx_priv.h b/libavutil/tx_priv.h
index 207f79dfb8..f11061d051 100644
--- a/libavutil/tx_priv.h
+++ b/libavutil/tx_priv.h
@@ -183,6 +183,9 @@ typedef struct FFTXCodeletOptions {
/* Maximum amount of subtransform functions, subtransforms and factors. Arbitrary. */
#define TX_MAX_SUB 4
+/* Maximum number of returned results for ff_tx_decompose_length. Arbitrary. */
+#define TX_MAX_DECOMPOSITIONS 512
+
typedef struct FFTXCodelet {
const char *name; /* Codelet name, for debugging */
av_tx_fn function; /* Codelet function, != NULL */
@@ -284,6 +287,11 @@ int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type,
/* Clear the context by freeing all tables, maps and subtransforms. */
void ff_tx_clear_ctx(AVTXContext *s);
+/* Attempt to factorize a length into 2 integers such that
+ * len / dst1 == dst2, where dst1 and dst2 are coprime. */
+int ff_tx_decompose_length(int dst[TX_MAX_DECOMPOSITIONS], enum AVTXType type,
+ int len, int inv);
+
/* Generate a default map (0->len or 0, (len-1)->1 for inverse transforms)
* for a context. */
int ff_tx_gen_default_map(AVTXContext *s, FFTXCodeletOptions *opts);