diff options
author | Lynne <dev@lynne.ee> | 2022-11-19 00:47:45 +0100 |
---|---|---|
committer | Lynne <dev@lynne.ee> | 2022-11-24 15:58:34 +0100 |
commit | 87bae6b0189d5cb71b836890078f96a4d1abd277 (patch) | |
tree | 83f30f5861f5d94cbef297540d8c6e4b96ab8366 /libavutil/x86 | |
parent | 1c8d77a2bfa239621b63c4553c6221560b1ee298 (diff) | |
download | ffmpeg-87bae6b0189d5cb71b836890078f96a4d1abd277.tar.gz |
lavu/tx: refactor to explicitly track and convert lookup table order
Necessary for generalizing PFAs.
Diffstat (limited to 'libavutil/x86')
-rw-r--r-- | libavutil/x86/tx_float_init.c | 46 |
1 files changed, 25 insertions, 21 deletions
diff --git a/libavutil/x86/tx_float_init.c b/libavutil/x86/tx_float_init.c index 97ee44defa..d3c0beb50f 100644 --- a/libavutil/x86/tx_float_init.c +++ b/libavutil/x86/tx_float_init.c @@ -75,12 +75,11 @@ static av_cold int b ##basis## _i ##interleave(AVTXContext *s, \ int len, int inv, \ const void *scale) \ { \ - const int inv_lookup = opts ? opts->invert_lookup : 1; \ ff_tx_init_tabs_float(len); \ if (cd->max_len == 2) \ - return ff_tx_gen_ptwo_revtab(s, inv_lookup); \ + return ff_tx_gen_ptwo_revtab(s, opts); \ else \ - return ff_tx_gen_split_radix_parity_revtab(s, len, inv, inv_lookup, \ + return ff_tx_gen_split_radix_parity_revtab(s, len, inv, opts, \ basis, interleave); \ } @@ -91,27 +90,27 @@ static av_cold int factor_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale) { + int ret; + + /* The transformations below are performed in the gather domain, + * so override the option and let the infrastructure convert the map + * to SCATTER if needed. */ + FFTXCodeletOptions sub_opts = { .map_dir = FF_TX_MAP_GATHER }; + TX_TAB(ff_tx_init_tabs)(len); - s->map = av_malloc(len*sizeof(s->map)); - s->map[0] = 0; /* DC is always at the start */ - if (inv) /* Reversing the ACs flips the transform direction */ - for (int i = 1; i < len; i++) - s->map[i] = len - i; + if (len == 15) + ret = ff_tx_gen_pfa_input_map(s, &sub_opts, 3, 5); else - for (int i = 1; i < len; i++) - s->map[i] = i; + ret = ff_tx_gen_default_map(s, &sub_opts); + + if (ret < 0) + return ret; if (len == 15) { int cnt = 0, tmp[15]; - /* Our 15-point transform is actually a 5x3 PFA, so embed its input map. */ - memcpy(tmp, s->map, 15*sizeof(*tmp)); - for (int i = 0; i < 5; i++) - for (int j = 0; j < 3; j++) - s->map[i*3 + j] = tmp[(i*3 + j*5) % 15]; - - /* Special 15-point assembly permutation */ + /* Special permutation to simplify loads in the pre-permuted version */ memcpy(tmp, s->map, 15*sizeof(*tmp)); for (int i = 1; i < 15; i += 3) { s->map[cnt] = tmp[i]; @@ -139,7 +138,7 @@ static av_cold int m_inv_init(AVTXContext *s, const FFTXCodelet *cd, int len, int inv, const void *scale) { int ret; - FFTXCodeletOptions sub_opts = { .invert_lookup = 1 }; + FFTXCodeletOptions sub_opts = { .map_dir = FF_TX_MAP_GATHER }; s->scale_d = *((SCALE_TYPE *)scale); s->scale_f = s->scale_d; @@ -177,7 +176,7 @@ static av_cold int fft_pfa_init(AVTXContext *s, { int ret; int sub_len = len / cd->factors[0]; - FFTXCodeletOptions sub_opts = { .invert_lookup = 0 }; + FFTXCodeletOptions sub_opts = { .map_dir = FF_TX_MAP_SCATTER }; flags &= ~FF_TX_OUT_OF_PLACE; /* We want the subtransform to be */ flags |= AV_TX_INPLACE; /* in-place */ @@ -188,13 +187,18 @@ static av_cold int fft_pfa_init(AVTXContext *s, sub_len, inv, scale))) return ret; - if ((ret = ff_tx_gen_compound_mapping(s, cd->factors[0], sub_len))) + if ((ret = ff_tx_gen_compound_mapping(s, opts, s->inv, cd->factors[0], sub_len))) return ret; if (cd->factors[0] == 15) { + int tmp[15]; + + /* Our 15-point transform is also a compound one, so embed its input map */ + TX_EMBED_INPUT_PFA_MAP(s->map, len, 3, 5); + + /* Special permutation to simplify loads in the pre-permuted version */ for (int k = 0; k < s->sub[0].len; k++) { int cnt = 0; - int tmp[15]; memcpy(tmp, &s->map[k*15], 15*sizeof(*tmp)); for (int i = 1; i < 15; i += 3) { s->map[k*15 + cnt] = tmp[i]; |