/* * Copyright (c) 2022 Paul B Mahol * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include "libavutil/tx.h" #include "libavutil/avassert.h" #include "libavutil/avstring.h" #include "libavutil/channel_layout.h" #include "libavutil/cpu.h" #include "libavutil/opt.h" #include "libavutil/parseutils.h" #include "audio.h" #include "video.h" #include "avfilter.h" #include "filters.h" #include "internal.h" enum FrequencyScale { FSCALE_LINEAR, FSCALE_LOG2, FSCALE_BARK, FSCALE_MEL, FSCALE_ERBS, NB_FSCALE }; enum DirectionMode { DIRECTION_LR, DIRECTION_RL, DIRECTION_UD, DIRECTION_DU, NB_DIRECTION }; enum SlideMode { SLIDE_REPLACE, SLIDE_SCROLL, SLIDE_FRAME, NB_SLIDE }; typedef struct ShowCWTContext { const AVClass *class; int w, h; int mode; char *rate_str; AVRational auto_frame_rate; AVRational frame_rate; AVTXContext **fft; AVTXContext **ifft; av_tx_fn tx_fn; av_tx_fn itx_fn; int fft_in_size; int fft_out_size; int ifft_in_size; int ifft_out_size; int pos; int64_t in_pts; int64_t old_pts; int64_t eof_pts; float *frequency_band; AVFrame *kernel; unsigned *index; int *kernel_start; int *kernel_stop; AVFrame *cache[2]; AVFrame *outpicref; AVFrame *fft_in; AVFrame *fft_out; AVFrame *ifft_in; AVFrame *ifft_out; AVFrame *ch_out; int nb_threads; int nb_channels; int nb_consumed_samples; int pps; int eof; int slide; int new_frame; int direction; int hop_size; int hop_index; int ihop_size; int ihop_index; int input_padding_size; int input_sample_count; int output_padding_size; int output_sample_count; int frequency_band_count; float logarithmic_basis; int frequency_scale; float minimum_frequency; float maximum_frequency; float deviation; } ShowCWTContext; #define OFFSET(x) offsetof(ShowCWTContext, x) #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM static const AVOption showcwt_options[] = { { "size", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str = "640x512"}, 0, 0, FLAGS }, { "s", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str = "640x512"}, 0, 0, FLAGS }, { "rate", "set video rate", OFFSET(rate_str), AV_OPT_TYPE_STRING, {.str = "25"}, 0, 0, FLAGS }, { "r", "set video rate", OFFSET(rate_str), AV_OPT_TYPE_STRING, {.str = "25"}, 0, 0, FLAGS }, { "scale", "set frequency scale", OFFSET(frequency_scale), AV_OPT_TYPE_INT, {.i64=0}, 0, NB_FSCALE-1, FLAGS, "scale" }, { "linear", "linear", 0, AV_OPT_TYPE_CONST,{.i64=FSCALE_LINEAR}, 0, 0, FLAGS, "scale" }, { "log2", "logarithmic", 0, AV_OPT_TYPE_CONST,{.i64=FSCALE_LOG2}, 0, 0, FLAGS, "scale" }, { "bark", "bark", 0, AV_OPT_TYPE_CONST,{.i64=FSCALE_BARK}, 0, 0, FLAGS, "scale" }, { "mel", "mel", 0, AV_OPT_TYPE_CONST,{.i64=FSCALE_MEL}, 0, 0, FLAGS, "scale" }, { "erbs", "erbs", 0, AV_OPT_TYPE_CONST,{.i64=FSCALE_ERBS}, 0, 0, FLAGS, "scale" }, { "min", "set minimum frequency", OFFSET(minimum_frequency), AV_OPT_TYPE_FLOAT, {.dbl = 20.}, 1, 2000, FLAGS }, { "max", "set maximum frequency", OFFSET(maximum_frequency), AV_OPT_TYPE_FLOAT, {.dbl = 20000.}, 0, 192000, FLAGS }, { "logb", "set logarithmic basis", OFFSET(logarithmic_basis), AV_OPT_TYPE_FLOAT, {.dbl = 0.0001}, 0, 1, FLAGS }, { "deviation", "set frequency deviation", OFFSET(deviation), AV_OPT_TYPE_FLOAT, {.dbl = 1.}, 0, 10, FLAGS }, { "pps", "set pixels per second", OFFSET(pps), AV_OPT_TYPE_INT, {.i64 = 64}, 1, 1024, FLAGS }, { "mode", "set output mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=0}, 0, 4, FLAGS, "mode" }, { "magnitude", "magnitude", 0, AV_OPT_TYPE_CONST,{.i64=0}, 0, 0, FLAGS, "mode" }, { "phase", "phase", 0, AV_OPT_TYPE_CONST,{.i64=1}, 0, 0, FLAGS, "mode" }, { "magphase", "magnitude+phase", 0, AV_OPT_TYPE_CONST,{.i64=2}, 0, 0, FLAGS, "mode" }, { "channel", "color per channel", 0, AV_OPT_TYPE_CONST,{.i64=3}, 0, 0, FLAGS, "mode" }, { "stereo", "stereo difference", 0, AV_OPT_TYPE_CONST,{.i64=4}, 0, 0, FLAGS, "mode" }, { "slide", "set slide mode", OFFSET(slide), AV_OPT_TYPE_INT, {.i64=0}, 0, NB_SLIDE-1, FLAGS, "slide" }, { "replace", "replace", 0, AV_OPT_TYPE_CONST,{.i64=SLIDE_REPLACE},0, 0, FLAGS, "slide" }, { "scroll", "scroll", 0, AV_OPT_TYPE_CONST,{.i64=SLIDE_SCROLL}, 0, 0, FLAGS, "slide" }, { "frame", "frame", 0, AV_OPT_TYPE_CONST,{.i64=SLIDE_FRAME}, 0, 0, FLAGS, "slide" }, { "direction", "set direction mode", OFFSET(direction), AV_OPT_TYPE_INT, {.i64=0}, 0, NB_DIRECTION-1, FLAGS, "direction" }, { "lr", "left to right", 0, AV_OPT_TYPE_CONST,{.i64=DIRECTION_LR}, 0, 0, FLAGS, "direction" }, { "rl", "right to left", 0, AV_OPT_TYPE_CONST,{.i64=DIRECTION_RL}, 0, 0, FLAGS, "direction" }, { "ud", "up to down", 0, AV_OPT_TYPE_CONST,{.i64=DIRECTION_UD}, 0, 0, FLAGS, "direction" }, { "du", "down to up", 0, AV_OPT_TYPE_CONST,{.i64=DIRECTION_DU}, 0, 0, FLAGS, "direction" }, { NULL } }; AVFILTER_DEFINE_CLASS(showcwt); static av_cold void uninit(AVFilterContext *ctx) { ShowCWTContext *s = ctx->priv; av_freep(&s->frequency_band); av_freep(&s->kernel_start); av_freep(&s->kernel_stop); av_freep(&s->index); av_frame_free(&s->kernel); av_frame_free(&s->cache[0]); av_frame_free(&s->cache[1]); av_frame_free(&s->outpicref); av_frame_free(&s->fft_in); av_frame_free(&s->fft_out); av_frame_free(&s->ifft_in); av_frame_free(&s->ifft_out); av_frame_free(&s->ch_out); if (s->fft) { for (int n = 0; n < s->nb_threads; n++) av_tx_uninit(&s->fft[n]); av_freep(&s->fft); } if (s->ifft) { for (int n = 0; n < s->nb_threads; n++) av_tx_uninit(&s->ifft[n]); av_freep(&s->ifft); } } static int query_formats(AVFilterContext *ctx) { AVFilterFormats *formats = NULL; AVFilterChannelLayouts *layouts = NULL; AVFilterLink *inlink = ctx->inputs[0]; AVFilterLink *outlink = ctx->outputs[0]; static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE }; static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVA444P, AV_PIX_FMT_NONE }; int ret; formats = ff_make_format_list(sample_fmts); if ((ret = ff_formats_ref(formats, &inlink->outcfg.formats)) < 0) return ret; layouts = ff_all_channel_counts(); if ((ret = ff_channel_layouts_ref(layouts, &inlink->outcfg.channel_layouts)) < 0) return ret; formats = ff_all_samplerates(); if ((ret = ff_formats_ref(formats, &inlink->outcfg.samplerates)) < 0) return ret; formats = ff_make_format_list(pix_fmts); if ((ret = ff_formats_ref(formats, &outlink->incfg.formats)) < 0) return ret; return 0; } static void frequency_band(float *frequency_band, int frequency_band_count, float frequency_range, float frequency_offset, int frequency_scale, float deviation) { deviation *= sqrtf(1.f / (4.f * M_PI)); // Heisenberg Gabor Limit for (int y = 0; y < frequency_band_count; y++) { float frequency = frequency_range * (1.f - (float)y / frequency_band_count) + frequency_offset; float frequency_derivative = frequency_range / frequency_band_count; switch (frequency_scale) { case FSCALE_LOG2: frequency = powf(2.f, frequency); frequency_derivative *= logf(2.f) * frequency; break; case FSCALE_BARK: frequency = 600.f * sinhf(frequency / 6.f); frequency_derivative *= sqrtf(frequency * frequency + 360000.f) / 6.f; break; case FSCALE_MEL: frequency = 700.f * (powf(10.f, frequency / 2595.f) - 1.f); frequency_derivative *= (frequency + 700.f) * logf(10.f) / 2595.f; break; case FSCALE_ERBS: frequency = 676170.4f / (47.06538f - expf(frequency * 0.08950404f)) - 14678.49f; frequency_derivative *= (frequency * frequency + 14990.4 * frequency + 4577850.f) / 160514.f; break; } frequency_band[y*2 ] = frequency; frequency_band[y*2+1] = frequency_derivative * deviation; } } static float remap_log(float value, float log_factor) { float sign = (0 < value) - (value < 0); value = logf(value * sign) * log_factor; return 1.f - av_clipf(value, 0.f, 1.f); } static int run_channel_cwt_prepare(AVFilterContext *ctx, void *arg, int jobnr, int ch) { ShowCWTContext *s = ctx->priv; const int hop_size = s->hop_size; AVFrame *fin = arg; float *cache0 = (float *)s->cache[0]->extended_data[ch]; float *cache = (float *)s->cache[1]->extended_data[ch]; AVComplexFloat *src = (AVComplexFloat *)s->fft_in->extended_data[ch]; AVComplexFloat *dst = (AVComplexFloat *)s->fft_out->extended_data[ch]; if (fin) { const int offset = s->hop_index; const float *input = (const float *)fin->extended_data[ch]; memcpy(&cache[offset], input, fin->nb_samples * sizeof(float)); } if (fin == NULL) { memset(&cache[s->hop_index], 0, (hop_size - s->hop_index) * sizeof(float)); } else if (s->hop_index + fin->nb_samples < hop_size) { return 0; } for (int n = 0; n < hop_size; n++) { src[n].re = cache0[n]; src[n].im = 0.f; src[n + hop_size].re = cache[n]; src[n + hop_size].im = 0.f; } s->tx_fn(s->fft[jobnr], dst, src, sizeof(*src)); return 0; } static int draw(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) { ShowCWTContext *s = ctx->priv; const ptrdiff_t ylinesize = s->outpicref->linesize[0]; const ptrdiff_t ulinesize = s->outpicref->linesize[1]; const ptrdiff_t vlinesize = s->outpicref->linesize[2]; const ptrdiff_t alinesize = s->outpicref->linesize[3]; const float log_factor = 1.f/logf(s->logarithmic_basis); const int count = s->frequency_band_count; const int start = (count * jobnr) / nb_jobs; const int end = (count * (jobnr+1)) / nb_jobs; const int ihop_index = s->ihop_index; const int ihop_size = s->ihop_size; const int direction = s->direction; uint8_t *dstY, *dstU, *dstV, *dstA; const int mode = s->mode; const int w_1 = s->w - 1; const int x = s->pos; float Y, U, V; for (int y = start; y < end; y++) { const AVComplexFloat *src = ((const AVComplexFloat *)s->ch_out->extended_data[0]) + y * ihop_size + ihop_index; switch (direction) { case DIRECTION_LR: case DIRECTION_RL: dstY = s->outpicref->data[0] + y * ylinesize; dstU = s->outpicref->data[1] + y * ulinesize; dstV = s->outpicref->data[2] + y * vlinesize; dstA = s->outpicref->data[3] ? s->outpicref->data[3] + y * alinesize : NULL; break; case DIRECTION_UD: case DIRECTION_DU: dstY = s->outpicref->data[0] + x * ylinesize + w_1 - y; dstU = s->outpicref->data[1] + x * ulinesize + w_1 - y; dstV = s->outpicref->data[2] + x * vlinesize + w_1 - y; dstA = s->outpicref->data[3] ? s->outpicref->data[3] + x * alinesize + w_1 - y : NULL; break; } switch (s->slide) { case SLIDE_REPLACE: case SLIDE_FRAME: /* nothing to do here */ break; case SLIDE_SCROLL: switch (s->direction) { case DIRECTION_RL: memmove(dstY, dstY + 1, w_1); memmove(dstU, dstU + 1, w_1); memmove(dstV, dstV + 1, w_1); if (dstA != NULL) memmove(dstA, dstA + 1, w_1); break; case DIRECTION_LR: memmove(dstY + 1, dstY, w_1); memmove(dstU + 1, dstU, w_1); memmove(dstV + 1, dstV, w_1); if (dstA != NULL) memmove(dstA + 1, dstA, w_1); break; } break; } if (direction == DIRECTION_RL || direction == DIRECTION_LR) { dstY += x; dstU += x; dstV += x; if (dstA != NULL) dstA += x; } switch (mode) { case 4: { const AVComplexFloat *src2 = ((const AVComplexFloat *)s->ch_out->extended_data[FFMIN(1, s->nb_channels - 1)]) + y * ihop_size + ihop_index; float z, u, v; z = hypotf(src[0].re + src2[0].re, src[0].im + src2[0].im); u = hypotf(src[0].re, src[0].im); v = hypotf(src2[0].re, src2[0].im); z = remap_log(z, log_factor); u = remap_log(u, log_factor); v = remap_log(v, log_factor); Y = z; U = 0.5f + z * sinf((v - u) * M_PI_2); V = 0.5f + z * sinf((u - v) * M_PI_2); dstY[0] = av_clip_uint8(lrintf(Y * 255.f)); dstU[0] = av_clip_uint8(lrintf(U * 255.f)); dstV[0] = av_clip_uint8(lrintf(V * 255.f)); if (dstA) dstA[0] = dstY[0]; } break; case 3: { const int nb_channels = s->nb_channels; const float yf = 1.f / nb_channels; Y = 0.f; U = V = 0.5f; for (int ch = 0; ch < nb_channels; ch++) { const AVComplexFloat *src = ((const AVComplexFloat *)s->ch_out->extended_data[ch]) + y * ihop_size + ihop_index; float z; z = hypotf(src[0].re, src[0].im); z = remap_log(z, log_factor); Y += z * yf; U += z * yf * sinf(2.f * M_PI * ch * yf); V += z * yf * cosf(2.f * M_PI * ch * yf); } dstY[0] = av_clip_uint8(lrintf(Y * 255.f)); dstU[0] = av_clip_uint8(lrintf(U * 255.f)); dstV[0] = av_clip_uint8(lrintf(V * 255.f)); if (dstA) dstA[0] = dstY[0]; } break; case 2: Y = hypotf(src[0].re, src[0].im); Y = remap_log(Y, log_factor); U = atan2f(src[0].im, src[0].re); U = 0.5f + 0.5f * U * Y / M_PI; V = 1.f - U; dstY[0] = av_clip_uint8(lrintf(Y * 255.f)); dstU[0] = av_clip_uint8(lrintf(U * 255.f)); dstV[0] = av_clip_uint8(lrintf(V * 255.f)); if (dstA) dstA[0] = dstY[0]; break; case 1: Y = atan2f(src[0].im, src[0].re); Y = 0.5f + 0.5f * Y / M_PI; dstY[0] = av_clip_uint8(lrintf(Y * 255.f)); if (dstA) dstA[0] = dstY[0]; break; case 0: Y = hypotf(src[0].re, src[0].im); Y = remap_log(Y, log_factor); dstY[0] = av_clip_uint8(lrintf(Y * 255.f)); if (dstA) dstA[0] = dstY[0]; break; } } return 0; } static int run_channel_cwt(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) { ShowCWTContext *s = ctx->priv; const int ch = *(int *)arg; AVComplexFloat *dst = (AVComplexFloat *)s->fft_out->extended_data[ch]; const int output_sample_count = s->output_sample_count; const int ihop_size = s->ihop_size; const int ioffset = (s->output_padding_size - ihop_size) >> 1; const int count = s->frequency_band_count; const int start = (count * jobnr) / nb_jobs; const int end = (count * (jobnr+1)) / nb_jobs; for (int y = start; y < end; y++) { AVComplexFloat *isrc = (AVComplexFloat *)s->ifft_in->extended_data[y]; AVComplexFloat *idst = (AVComplexFloat *)s->ifft_out->extended_data[y]; AVComplexFloat *chout = ((AVComplexFloat *)s->ch_out->extended_data[ch]) + y * ihop_size; const float *kernel = (const float *)s->kernel->extended_data[y]; const unsigned *index = (const unsigned *)s->index; const int kernel_start = s->kernel_start[y]; const int kernel_stop = s->kernel_stop[y]; memset(isrc, 0, sizeof(*isrc) * output_sample_count); for (int i = kernel_start; i < kernel_stop; i++) { const unsigned n = index[i]; const float ff = kernel[i]; isrc[n].re += ff * dst[i].re; isrc[n].im += ff * dst[i].im; } s->itx_fn(s->ifft[jobnr], idst, isrc, sizeof(*isrc)); memcpy(chout, idst + ioffset, sizeof(*chout) * ihop_size); } return 0; } static void compute_kernel(AVFilterContext *ctx) { ShowCWTContext *s = ctx->priv; const int size = s->input_sample_count; const float scale_factor = 1.f/(float)size; const int output_sample_count = s->output_sample_count; const int fsize = s->frequency_band_count; unsigned *index = s->index; for (int y = 0; y < fsize; y++) { float *kernel = (float *)s->kernel->extended_data[y]; int *kernel_start = s->kernel_start; int *kernel_stop = s->kernel_stop; float frequency = s->frequency_band[y*2]; float deviation = 1.f / (s->frequency_band[y*2+1] * output_sample_count); for (int n = 0; n < size; n++) { float ff, f = fabsf(n-frequency); f = size - fabsf(f - size); ff = expf(-f*f*deviation) * scale_factor; kernel[n] = ff; } for (int n = 0; n < size; n++) { if (kernel[n] != 0.f) { kernel_start[y] = n; break; } } for (int n = 0; n < size; n++) { if (kernel[size - n - 1] != 0.f) { kernel_stop[y] = size - n; break; } } } for (int n = 0; n < size; n++) index[n] = n % output_sample_count; } static int config_output(AVFilterLink *outlink) { AVFilterContext *ctx = outlink->src; AVFilterLink *inlink = ctx->inputs[0]; ShowCWTContext *s = ctx->priv; float maximum_frequency = fminf(s->maximum_frequency, inlink->sample_rate * 0.5f); float minimum_frequency = s->minimum_frequency; float scale = 1.f, factor; int ret; uninit(ctx); switch (s->direction) { case DIRECTION_LR: case DIRECTION_RL: s->frequency_band_count = s->h; break; case DIRECTION_UD: case DIRECTION_DU: s->frequency_band_count = s->w; break; } s->new_frame = 1; s->nb_threads = FFMIN(s->frequency_band_count, ff_filter_get_nb_threads(ctx)); s->nb_channels = inlink->ch_layout.nb_channels; s->old_pts = AV_NOPTS_VALUE; s->eof_pts = AV_NOPTS_VALUE; s->nb_consumed_samples = 65536; s->input_sample_count = s->nb_consumed_samples; s->hop_size = s->nb_consumed_samples >> 1; s->input_padding_size = 65536; s->output_padding_size = FFMAX(16, s->input_padding_size * s->pps / inlink->sample_rate); outlink->w = s->w; outlink->h = s->h; outlink->sample_aspect_ratio = (AVRational){1,1}; s->fft_in_size = FFALIGN(s->input_padding_size, av_cpu_max_align()); s->fft_out_size = FFALIGN(s->input_padding_size, av_cpu_max_align()); s->output_sample_count = s->output_padding_size; s->ifft_in_size = FFALIGN(s->output_padding_size, av_cpu_max_align()); s->ifft_out_size = FFALIGN(s->output_padding_size, av_cpu_max_align()); s->ihop_size = s->output_padding_size >> 1; s->fft = av_calloc(s->nb_threads, sizeof(*s->fft)); if (!s->fft) return AVERROR(ENOMEM); for (int n = 0; n < s->nb_threads; n++) { ret = av_tx_init(&s->fft[n], &s->tx_fn, AV_TX_FLOAT_FFT, 0, s->input_padding_size, &scale, 0); if (ret < 0) return ret; } s->ifft = av_calloc(s->nb_threads, sizeof(*s->ifft)); if (!s->ifft) return AVERROR(ENOMEM); for (int n = 0; n < s->nb_threads; n++) { ret = av_tx_init(&s->ifft[n], &s->itx_fn, AV_TX_FLOAT_FFT, 1, s->output_padding_size, &scale, 0); if (ret < 0) return ret; } s->frequency_band = av_calloc(s->frequency_band_count, sizeof(*s->frequency_band) * 2); s->outpicref = ff_get_video_buffer(outlink, outlink->w, outlink->h); s->fft_in = ff_get_audio_buffer(inlink, s->fft_in_size * 2); s->fft_out = ff_get_audio_buffer(inlink, s->fft_out_size * 2); s->cache[0] = ff_get_audio_buffer(inlink, s->hop_size); s->cache[1] = ff_get_audio_buffer(inlink, s->hop_size); s->ch_out = ff_get_audio_buffer(inlink, s->frequency_band_count * 2 * s->ihop_size); s->ifft_in = av_frame_alloc(); s->ifft_out = av_frame_alloc(); s->kernel = av_frame_alloc(); s->index = av_calloc(s->input_padding_size, sizeof(*s->index)); s->kernel_start = av_calloc(s->frequency_band_count, sizeof(*s->kernel_start)); s->kernel_stop = av_calloc(s->frequency_band_count, sizeof(*s->kernel_stop)); if (!s->outpicref || !s->fft_in || !s->fft_out || !s->ifft_in || !s->ifft_out || !s->kernel_start || !s->kernel_stop || !s->frequency_band || !s->kernel || !s->cache[0] || !s->cache[1] || !s->index) return AVERROR(ENOMEM); s->ifft_in->format = inlink->format; s->ifft_in->nb_samples = s->ifft_in_size * 2; s->ifft_in->ch_layout.nb_channels = s->frequency_band_count; ret = av_frame_get_buffer(s->ifft_in, 0); if (ret < 0) return ret; s->ifft_out->format = inlink->format; s->ifft_out->nb_samples = s->ifft_out_size * 2; s->ifft_out->ch_layout.nb_channels = s->frequency_band_count; ret = av_frame_get_buffer(s->ifft_out, 0); if (ret < 0) return ret; s->kernel->format = inlink->format; s->kernel->nb_samples = s->input_padding_size; s->kernel->ch_layout.nb_channels = s->frequency_band_count; ret = av_frame_get_buffer(s->kernel, 0); if (ret < 0) return ret; s->outpicref->sample_aspect_ratio = (AVRational){1,1}; for (int y = 0; y < outlink->h; y++) { memset(s->outpicref->data[0] + y * s->outpicref->linesize[0], 0, outlink->w); memset(s->outpicref->data[1] + y * s->outpicref->linesize[1], 128, outlink->w); memset(s->outpicref->data[2] + y * s->outpicref->linesize[2], 128, outlink->w); if (s->outpicref->data[3]) memset(s->outpicref->data[3] + y * s->outpicref->linesize[3], 0, outlink->w); } s->outpicref->color_range = AVCOL_RANGE_JPEG; factor = s->nb_consumed_samples / (float)inlink->sample_rate; minimum_frequency *= factor; maximum_frequency *= factor; switch (s->frequency_scale) { case FSCALE_LOG2: minimum_frequency = logf(minimum_frequency) / logf(2.f); maximum_frequency = logf(maximum_frequency) / logf(2.f); break; case FSCALE_BARK: minimum_frequency = 6.f * asinhf(minimum_frequency / 600.f); maximum_frequency = 6.f * asinhf(maximum_frequency / 600.f); break; case FSCALE_MEL: minimum_frequency = 2595.f * log10f(1.f + minimum_frequency / 700.f); maximum_frequency = 2595.f * log10f(1.f + maximum_frequency / 700.f); break; case FSCALE_ERBS: minimum_frequency = 11.17268f * log(1.f + (46.06538f * minimum_frequency) / (minimum_frequency + 14678.49f)); maximum_frequency = 11.17268f * log(1.f + (46.06538f * maximum_frequency) / (maximum_frequency + 14678.49f)); break; } frequency_band(s->frequency_band, s->frequency_band_count, maximum_frequency - minimum_frequency, minimum_frequency, s->frequency_scale, s->deviation); av_log(ctx, AV_LOG_DEBUG, "input_sample_count: %d\n", s->input_sample_count); av_log(ctx, AV_LOG_DEBUG, "output_sample_count: %d\n", s->output_sample_count); switch (s->direction) { case DIRECTION_LR: s->pos = 0; break; case DIRECTION_RL: s->pos = s->w - 1; break; case DIRECTION_UD: s->pos = 0; break; case DIRECTION_DU: s->pos = s->h - 1; break; } s->auto_frame_rate = av_make_q(inlink->sample_rate, s->hop_size); if (strcmp(s->rate_str, "auto")) { ret = av_parse_video_rate(&s->frame_rate, s->rate_str); } else { s->frame_rate = s->auto_frame_rate; } outlink->frame_rate = s->frame_rate; outlink->time_base = av_inv_q(outlink->frame_rate); compute_kernel(ctx); return 0; } static int output_frame(AVFilterContext *ctx) { AVFilterLink *outlink = ctx->outputs[0]; AVFilterLink *inlink = ctx->inputs[0]; ShowCWTContext *s = ctx->priv; const int nb_planes = 3 + (s->outpicref->data[3] != NULL); int ret; switch (s->slide) { case SLIDE_SCROLL: switch (s->direction) { case DIRECTION_UD: for (int p = 0; p < nb_planes; p++) { ptrdiff_t linesize = s->outpicref->linesize[p]; for (int y = s->h - 1; y > 0; y--) { uint8_t *dst = s->outpicref->data[p] + y * linesize; memmove(dst, dst - linesize, s->w); } } break; case DIRECTION_DU: for (int p = 0; p < nb_planes; p++) { ptrdiff_t linesize = s->outpicref->linesize[p]; for (int y = 0; y < s->h - 1; y++) { uint8_t *dst = s->outpicref->data[p] + y * linesize; memmove(dst, dst + linesize, s->w); } } break; } break; } ff_filter_execute(ctx, draw, NULL, NULL, s->nb_threads); switch (s->slide) { case SLIDE_REPLACE: case SLIDE_FRAME: switch (s->direction) { case DIRECTION_LR: s->pos++; if (s->pos >= s->w) { s->pos = 0; s->new_frame = 1; } break; case DIRECTION_RL: s->pos--; if (s->pos < 0) { s->pos = s->w - 1; s->new_frame = 1; } break; case DIRECTION_UD: s->pos++; if (s->pos >= s->h) { s->pos = 0; s->new_frame = 1; } break; case DIRECTION_DU: s->pos--; if (s->pos < 0) { s->pos = s->h - 1; s->new_frame = 1; } break; } break; case SLIDE_SCROLL: switch (s->direction) { case DIRECTION_UD: case DIRECTION_LR: s->pos = 0; break; case DIRECTION_RL: s->pos = s->w - 1; break; case DIRECTION_DU: s->pos = s->h - 1; break; } break; } if (s->slide == SLIDE_FRAME && s->eof) { switch (s->direction) { case DIRECTION_LR: for (int p = 0; p < nb_planes; p++) { ptrdiff_t linesize = s->outpicref->linesize[p]; const int size = s->w - s->pos; const int fill = p > 0 && p < 3 ? 128 : 0; const int x = s->pos; for (int y = 0; y < s->h; y++) { uint8_t *dst = s->outpicref->data[p] + y * linesize + x; memset(dst, fill, size); } } break; case DIRECTION_RL: for (int p = 0; p < nb_planes; p++) { ptrdiff_t linesize = s->outpicref->linesize[p]; const int size = s->w - s->pos; const int fill = p > 0 && p < 3 ? 128 : 0; for (int y = 0; y < s->h; y++) { uint8_t *dst = s->outpicref->data[p] + y * linesize; memset(dst, fill, size); } } break; case DIRECTION_UD: for (int p = 0; p < nb_planes; p++) { ptrdiff_t linesize = s->outpicref->linesize[p]; const int fill = p > 0 && p < 3 ? 128 : 0; for (int y = s->pos; y < s->h; y++) { uint8_t *dst = s->outpicref->data[p] + y * linesize; memset(dst, fill, s->w); } } break; case DIRECTION_DU: for (int p = 0; p < nb_planes; p++) { ptrdiff_t linesize = s->outpicref->linesize[p]; const int fill = p > 0 && p < 3 ? 128 : 0; for (int y = s->h - s->pos; y >= 0; y--) { uint8_t *dst = s->outpicref->data[p] + y * linesize; memset(dst, fill, s->w); } } break; } } s->new_frame = s->slide == SLIDE_FRAME && (s->new_frame || s->eof); if (s->slide != SLIDE_FRAME || s->new_frame == 1) { int64_t pts_offset = s->new_frame ? 0LL : av_rescale(s->ihop_index, s->hop_size, s->ihop_size); s->outpicref->pts = av_rescale_q(s->in_pts + pts_offset, inlink->time_base, outlink->time_base); s->outpicref->duration = 1; } s->ihop_index++; if (s->ihop_index >= s->ihop_size) s->ihop_index = 0; if (s->slide == SLIDE_FRAME && s->new_frame == 0) return 1; if (s->old_pts < s->outpicref->pts) { AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h); if (!out) return AVERROR(ENOMEM); ret = av_frame_copy_props(out, s->outpicref); if (ret < 0) goto fail; ret = av_frame_copy(out, s->outpicref); if (ret < 0) goto fail; s->old_pts = s->outpicref->pts; s->new_frame = 0; ret = ff_filter_frame(outlink, out); if (ret <= 0) return ret; fail: av_frame_free(&out); return ret; } return 1; } static int run_channels_cwt_prepare(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) { ShowCWTContext *s = ctx->priv; const int count = s->nb_channels; const int start = (count * jobnr) / nb_jobs; const int end = (count * (jobnr+1)) / nb_jobs; for (int ch = start; ch < end; ch++) run_channel_cwt_prepare(ctx, arg, jobnr, ch); return 0; } static int activate(AVFilterContext *ctx) { AVFilterLink *inlink = ctx->inputs[0]; AVFilterLink *outlink = ctx->outputs[0]; ShowCWTContext *s = ctx->priv; int ret = 0, status; int64_t pts; FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink); if (s->outpicref) { AVFrame *fin = NULL; if (s->ihop_index == 0) { if (!s->eof) { ret = ff_inlink_consume_samples(inlink, 1, s->hop_size - s->hop_index, &fin); if (ret < 0) return ret; } if (ret > 0 || s->eof) { ff_filter_execute(ctx, run_channels_cwt_prepare, fin, NULL, FFMIN(s->nb_threads, s->nb_channels)); if (fin) { if ((s->hop_index == 0 && s->slide != SLIDE_FRAME) || s->new_frame) { s->in_pts = fin->pts; s->new_frame = 0; } s->hop_index += fin->nb_samples; av_frame_free(&fin); } else { s->hop_index = s->hop_size; } } } if (s->hop_index >= s->hop_size || s->ihop_index > 0) { if (s->hop_index) { FFSWAP(AVFrame *, s->cache[0], s->cache[1]); s->hop_index = 0; } for (int ch = 0; ch < s->nb_channels && s->ihop_index == 0; ch++) { ff_filter_execute(ctx, run_channel_cwt, (void *)&ch, NULL, s->nb_threads); } ret = output_frame(ctx); if (ret != 1) return ret; } } if (s->eof && s->eof_pts != AV_NOPTS_VALUE && (s->old_pts + 1 >= s->eof_pts || (s->slide == SLIDE_FRAME))) { if (s->slide == SLIDE_FRAME) ret = output_frame(ctx); ff_outlink_set_status(outlink, AVERROR_EOF, s->eof_pts); return ret; } if (!s->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) { if (status == AVERROR_EOF) { s->eof = 1; ff_filter_set_ready(ctx, 10); s->eof_pts = av_rescale_q(pts, inlink->time_base, outlink->time_base); return 0; } } if (ff_inlink_queued_samples(inlink) > 0 || s->ihop_index || s->hop_index >= s->hop_size || s->eof) { ff_filter_set_ready(ctx, 10); return 0; } if (ff_outlink_frame_wanted(outlink)) { ff_inlink_request_frame(inlink); return 0; } return FFERROR_NOT_READY; } static const AVFilterPad showcwt_inputs[] = { { .name = "default", .type = AVMEDIA_TYPE_AUDIO, }, }; static const AVFilterPad showcwt_outputs[] = { { .name = "default", .type = AVMEDIA_TYPE_VIDEO, .config_props = config_output, }, }; const AVFilter ff_avf_showcwt = { .name = "showcwt", .description = NULL_IF_CONFIG_SMALL("Convert input audio to a CWT (Continuous Wavelet Transform) spectrum video output."), .uninit = uninit, .priv_size = sizeof(ShowCWTContext), FILTER_INPUTS(showcwt_inputs), FILTER_OUTPUTS(showcwt_outputs), FILTER_QUERY_FUNC(query_formats), .activate = activate, .priv_class = &showcwt_class, .flags = AVFILTER_FLAG_SLICE_THREADS, };