FFmpeg  4.3.9
vf_dnn_processing.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019 Guo Yejun
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * implementing a generic image processing filter using deep learning networks.
24  */
25 
26 #include "libavformat/avio.h"
27 #include "libavutil/opt.h"
28 #include "libavutil/pixdesc.h"
29 #include "libavutil/avassert.h"
30 #include "libavutil/imgutils.h"
31 #include "avfilter.h"
32 #include "dnn_interface.h"
33 #include "formats.h"
34 #include "internal.h"
35 #include "libswscale/swscale.h"
36 
37 typedef struct DnnProcessingContext {
38  const AVClass *class;
39 
44 
47 
48  // input & output of the model at execution time
51 
57 
58 #define OFFSET(x) offsetof(DnnProcessingContext, x)
59 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
60 static const AVOption dnn_processing_options[] = {
61  { "dnn_backend", "DNN backend", OFFSET(backend_type), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS, "backend" },
62  { "native", "native backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "backend" },
63 #if (CONFIG_LIBTENSORFLOW == 1)
64  { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "backend" },
65 #endif
66  { "model", "path to model file", OFFSET(model_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
67  { "input", "input name of the model", OFFSET(model_inputname), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
68  { "output", "output name of the model", OFFSET(model_outputname), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
69  { NULL }
70 };
71 
72 AVFILTER_DEFINE_CLASS(dnn_processing);
73 
74 static av_cold int init(AVFilterContext *context)
75 {
76  DnnProcessingContext *ctx = context->priv;
77 
78  if (!ctx->model_filename) {
79  av_log(ctx, AV_LOG_ERROR, "model file for network is not specified\n");
80  return AVERROR(EINVAL);
81  }
82  if (!ctx->model_inputname) {
83  av_log(ctx, AV_LOG_ERROR, "input name of the model network is not specified\n");
84  return AVERROR(EINVAL);
85  }
86  if (!ctx->model_outputname) {
87  av_log(ctx, AV_LOG_ERROR, "output name of the model network is not specified\n");
88  return AVERROR(EINVAL);
89  }
90 
92  if (!ctx->dnn_module) {
93  av_log(ctx, AV_LOG_ERROR, "could not create DNN module for requested backend\n");
94  return AVERROR(ENOMEM);
95  }
96  if (!ctx->dnn_module->load_model) {
97  av_log(ctx, AV_LOG_ERROR, "load_model for network is not specified\n");
98  return AVERROR(EINVAL);
99  }
100 
101  ctx->model = (ctx->dnn_module->load_model)(ctx->model_filename);
102  if (!ctx->model) {
103  av_log(ctx, AV_LOG_ERROR, "could not load DNN model\n");
104  return AVERROR(EINVAL);
105  }
106 
107  return 0;
108 }
109 
110 static int query_formats(AVFilterContext *context)
111 {
112  static const enum AVPixelFormat pix_fmts[] = {
118  };
119  AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
120  return ff_set_common_formats(context, fmts_list);
121 }
122 
123 #define LOG_FORMAT_CHANNEL_MISMATCH() \
124  av_log(ctx, AV_LOG_ERROR, \
125  "the frame's format %s does not match " \
126  "the model input channel %d\n", \
127  av_get_pix_fmt_name(fmt), \
128  model_input->channels);
129 
130 static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLink *inlink)
131 {
132  AVFilterContext *ctx = inlink->dst;
133  enum AVPixelFormat fmt = inlink->format;
134 
135  // the design is to add explicit scale filter before this filter
136  if (model_input->height != -1 && model_input->height != inlink->h) {
137  av_log(ctx, AV_LOG_ERROR, "the model requires frame height %d but got %d\n",
138  model_input->height, inlink->h);
139  return AVERROR(EIO);
140  }
141  if (model_input->width != -1 && model_input->width != inlink->w) {
142  av_log(ctx, AV_LOG_ERROR, "the model requires frame width %d but got %d\n",
143  model_input->width, inlink->w);
144  return AVERROR(EIO);
145  }
146 
147  switch (fmt) {
148  case AV_PIX_FMT_RGB24:
149  case AV_PIX_FMT_BGR24:
150  if (model_input->channels != 3) {
152  return AVERROR(EIO);
153  }
154  if (model_input->dt != DNN_FLOAT && model_input->dt != DNN_UINT8) {
155  av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type as float32 and uint8.\n");
156  return AVERROR(EIO);
157  }
158  return 0;
159  case AV_PIX_FMT_GRAY8:
160  if (model_input->channels != 1) {
162  return AVERROR(EIO);
163  }
164  if (model_input->dt != DNN_UINT8) {
165  av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type uint8.\n");
166  return AVERROR(EIO);
167  }
168  return 0;
169  case AV_PIX_FMT_GRAYF32:
170  case AV_PIX_FMT_YUV420P:
171  case AV_PIX_FMT_YUV422P:
172  case AV_PIX_FMT_YUV444P:
173  case AV_PIX_FMT_YUV410P:
174  case AV_PIX_FMT_YUV411P:
175  if (model_input->channels != 1) {
177  return AVERROR(EIO);
178  }
179  if (model_input->dt != DNN_FLOAT) {
180  av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type float32.\n");
181  return AVERROR(EIO);
182  }
183  return 0;
184  default:
185  av_log(ctx, AV_LOG_ERROR, "%s not supported.\n", av_get_pix_fmt_name(fmt));
186  return AVERROR(EIO);
187  }
188 
189  return 0;
190 }
191 
192 static int config_input(AVFilterLink *inlink)
193 {
194  AVFilterContext *context = inlink->dst;
195  DnnProcessingContext *ctx = context->priv;
196  DNNReturnType result;
197  DNNData model_input;
198  int check;
199 
200  result = ctx->model->get_input(ctx->model->model, &model_input, ctx->model_inputname);
201  if (result != DNN_SUCCESS) {
202  av_log(ctx, AV_LOG_ERROR, "could not get input from the model\n");
203  return AVERROR(EIO);
204  }
205 
206  check = check_modelinput_inlink(&model_input, inlink);
207  if (check != 0) {
208  return check;
209  }
210 
211  ctx->input.width = inlink->w;
212  ctx->input.height = inlink->h;
213  ctx->input.channels = model_input.channels;
214  ctx->input.dt = model_input.dt;
215 
216  result = (ctx->model->set_input_output)(ctx->model->model,
217  &ctx->input, ctx->model_inputname,
218  (const char **)&ctx->model_outputname, 1);
219  if (result != DNN_SUCCESS) {
220  av_log(ctx, AV_LOG_ERROR, "could not set input and output for the model\n");
221  return AVERROR(EIO);
222  }
223 
224  return 0;
225 }
226 
227 static int prepare_sws_context(AVFilterLink *outlink)
228 {
229  AVFilterContext *context = outlink->src;
230  DnnProcessingContext *ctx = context->priv;
231  AVFilterLink *inlink = context->inputs[0];
232  enum AVPixelFormat fmt = inlink->format;
233  DNNDataType input_dt = ctx->input.dt;
234  DNNDataType output_dt = ctx->output.dt;
235 
236  switch (fmt) {
237  case AV_PIX_FMT_RGB24:
238  case AV_PIX_FMT_BGR24:
239  if (input_dt == DNN_FLOAT) {
240  ctx->sws_gray8_to_grayf32 = sws_getContext(inlink->w * 3,
241  inlink->h,
243  inlink->w * 3,
244  inlink->h,
246  0, NULL, NULL, NULL);
247  }
248  if (output_dt == DNN_FLOAT) {
249  ctx->sws_grayf32_to_gray8 = sws_getContext(outlink->w * 3,
250  outlink->h,
252  outlink->w * 3,
253  outlink->h,
255  0, NULL, NULL, NULL);
256  }
257  return 0;
258  case AV_PIX_FMT_YUV420P:
259  case AV_PIX_FMT_YUV422P:
260  case AV_PIX_FMT_YUV444P:
261  case AV_PIX_FMT_YUV410P:
262  case AV_PIX_FMT_YUV411P:
263  av_assert0(input_dt == DNN_FLOAT);
264  av_assert0(output_dt == DNN_FLOAT);
265  ctx->sws_gray8_to_grayf32 = sws_getContext(inlink->w,
266  inlink->h,
268  inlink->w,
269  inlink->h,
271  0, NULL, NULL, NULL);
272  ctx->sws_grayf32_to_gray8 = sws_getContext(outlink->w,
273  outlink->h,
275  outlink->w,
276  outlink->h,
278  0, NULL, NULL, NULL);
279 
280  if (inlink->w != outlink->w || inlink->h != outlink->h) {
282  int sws_src_h = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
283  int sws_src_w = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
284  int sws_dst_h = AV_CEIL_RSHIFT(outlink->h, desc->log2_chroma_h);
285  int sws_dst_w = AV_CEIL_RSHIFT(outlink->w, desc->log2_chroma_w);
286  ctx->sws_uv_scale = sws_getContext(sws_src_w, sws_src_h, AV_PIX_FMT_GRAY8,
287  sws_dst_w, sws_dst_h, AV_PIX_FMT_GRAY8,
289  ctx->sws_uv_height = sws_src_h;
290  }
291  return 0;
292  default:
293  //do nothing
294  break;
295  }
296 
297  return 0;
298 }
299 
300 static int config_output(AVFilterLink *outlink)
301 {
302  AVFilterContext *context = outlink->src;
303  DnnProcessingContext *ctx = context->priv;
304  DNNReturnType result;
305 
306  // have a try run in case that the dnn model resize the frame
307  result = (ctx->dnn_module->execute_model)(ctx->model, &ctx->output, 1);
308  if (result != DNN_SUCCESS){
309  av_log(ctx, AV_LOG_ERROR, "failed to execute model\n");
310  return AVERROR(EIO);
311  }
312 
313  outlink->w = ctx->output.width;
314  outlink->h = ctx->output.height;
315 
316  prepare_sws_context(outlink);
317 
318  return 0;
319 }
320 
322 {
323  int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);
324  DNNData *dnn_input = &ctx->input;
325 
326  switch (frame->format) {
327  case AV_PIX_FMT_RGB24:
328  case AV_PIX_FMT_BGR24:
329  if (dnn_input->dt == DNN_FLOAT) {
330  sws_scale(ctx->sws_gray8_to_grayf32, (const uint8_t **)frame->data, frame->linesize,
331  0, frame->height, (uint8_t * const*)(&dnn_input->data),
332  (const int [4]){frame->width * 3 * sizeof(float), 0, 0, 0});
333  } else {
334  av_assert0(dnn_input->dt == DNN_UINT8);
335  av_image_copy_plane(dnn_input->data, bytewidth,
336  frame->data[0], frame->linesize[0],
337  bytewidth, frame->height);
338  }
339  return 0;
340  case AV_PIX_FMT_GRAY8:
341  case AV_PIX_FMT_GRAYF32:
342  av_image_copy_plane(dnn_input->data, bytewidth,
343  frame->data[0], frame->linesize[0],
344  bytewidth, frame->height);
345  return 0;
346  case AV_PIX_FMT_YUV420P:
347  case AV_PIX_FMT_YUV422P:
348  case AV_PIX_FMT_YUV444P:
349  case AV_PIX_FMT_YUV410P:
350  case AV_PIX_FMT_YUV411P:
351  sws_scale(ctx->sws_gray8_to_grayf32, (const uint8_t **)frame->data, frame->linesize,
352  0, frame->height, (uint8_t * const*)(&dnn_input->data),
353  (const int [4]){frame->width * sizeof(float), 0, 0, 0});
354  return 0;
355  default:
356  return AVERROR(EIO);
357  }
358 
359  return 0;
360 }
361 
363 {
364  int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);
365  DNNData *dnn_output = &ctx->output;
366 
367  switch (frame->format) {
368  case AV_PIX_FMT_RGB24:
369  case AV_PIX_FMT_BGR24:
370  if (dnn_output->dt == DNN_FLOAT) {
371  sws_scale(ctx->sws_grayf32_to_gray8, (const uint8_t *[4]){(const uint8_t *)dnn_output->data, 0, 0, 0},
372  (const int[4]){frame->width * 3 * sizeof(float), 0, 0, 0},
373  0, frame->height, (uint8_t * const*)frame->data, frame->linesize);
374 
375  } else {
376  av_assert0(dnn_output->dt == DNN_UINT8);
377  av_image_copy_plane(frame->data[0], frame->linesize[0],
378  dnn_output->data, bytewidth,
379  bytewidth, frame->height);
380  }
381  return 0;
382  case AV_PIX_FMT_GRAY8:
383  // it is possible that data type of dnn output is float32,
384  // need to add support for such case when needed.
385  av_assert0(dnn_output->dt == DNN_UINT8);
386  av_image_copy_plane(frame->data[0], frame->linesize[0],
387  dnn_output->data, bytewidth,
388  bytewidth, frame->height);
389  return 0;
390  case AV_PIX_FMT_GRAYF32:
391  av_assert0(dnn_output->dt == DNN_FLOAT);
392  av_image_copy_plane(frame->data[0], frame->linesize[0],
393  dnn_output->data, bytewidth,
394  bytewidth, frame->height);
395  return 0;
396  case AV_PIX_FMT_YUV420P:
397  case AV_PIX_FMT_YUV422P:
398  case AV_PIX_FMT_YUV444P:
399  case AV_PIX_FMT_YUV410P:
400  case AV_PIX_FMT_YUV411P:
401  sws_scale(ctx->sws_grayf32_to_gray8, (const uint8_t *[4]){(const uint8_t *)dnn_output->data, 0, 0, 0},
402  (const int[4]){frame->width * sizeof(float), 0, 0, 0},
403  0, frame->height, (uint8_t * const*)frame->data, frame->linesize);
404  return 0;
405  default:
406  return AVERROR(EIO);
407  }
408 
409  return 0;
410 }
411 
413 {
414  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
415  av_assert0(desc);
416  return !(desc->flags & AV_PIX_FMT_FLAG_RGB) && desc->nb_components == 3;
417 }
418 
420 {
421  const AVPixFmtDescriptor *desc;
422  int uv_height;
423 
424  if (!ctx->sws_uv_scale) {
425  av_assert0(in->height == out->height && in->width == out->width);
426  desc = av_pix_fmt_desc_get(in->format);
427  uv_height = AV_CEIL_RSHIFT(in->height, desc->log2_chroma_h);
428  for (int i = 1; i < 3; ++i) {
429  int bytewidth = av_image_get_linesize(in->format, in->width, i);
430  av_image_copy_plane(out->data[i], out->linesize[i],
431  in->data[i], in->linesize[i],
432  bytewidth, uv_height);
433  }
434  } else {
435  sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 1), in->linesize + 1,
436  0, ctx->sws_uv_height, out->data + 1, out->linesize + 1);
437  sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 2), in->linesize + 2,
438  0, ctx->sws_uv_height, out->data + 2, out->linesize + 2);
439  }
440 
441  return 0;
442 }
443 
444 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
445 {
446  AVFilterContext *context = inlink->dst;
447  AVFilterLink *outlink = context->outputs[0];
448  DnnProcessingContext *ctx = context->priv;
449  DNNReturnType dnn_result;
450  AVFrame *out;
451 
452  copy_from_frame_to_dnn(ctx, in);
453 
454  dnn_result = (ctx->dnn_module->execute_model)(ctx->model, &ctx->output, 1);
455  if (dnn_result != DNN_SUCCESS){
456  av_log(ctx, AV_LOG_ERROR, "failed to execute model\n");
457  av_frame_free(&in);
458  return AVERROR(EIO);
459  }
460 
461  out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
462  if (!out) {
463  av_frame_free(&in);
464  return AVERROR(ENOMEM);
465  }
466 
467  av_frame_copy_props(out, in);
468  copy_from_dnn_to_frame(ctx, out);
469 
470  if (isPlanarYUV(in->format))
471  copy_uv_planes(ctx, out, in);
472 
473  av_frame_free(&in);
474  return ff_filter_frame(outlink, out);
475 }
476 
478 {
479  DnnProcessingContext *context = ctx->priv;
480 
483  sws_freeContext(context->sws_uv_scale);
484 
485  if (context->dnn_module)
486  (context->dnn_module->free_model)(&context->model);
487 
488  av_freep(&context->dnn_module);
489 }
490 
492  {
493  .name = "default",
494  .type = AVMEDIA_TYPE_VIDEO,
495  .config_props = config_input,
496  .filter_frame = filter_frame,
497  },
498  { NULL }
499 };
500 
502  {
503  .name = "default",
504  .type = AVMEDIA_TYPE_VIDEO,
505  .config_props = config_output,
506  },
507  { NULL }
508 };
509 
511  .name = "dnn_processing",
512  .description = NULL_IF_CONFIG_SMALL("Apply DNN processing filter to the input."),
513  .priv_size = sizeof(DnnProcessingContext),
514  .init = init,
515  .uninit = uninit,
517  .inputs = dnn_processing_inputs,
518  .outputs = dnn_processing_outputs,
519  .priv_class = &dnn_processing_class,
520 };
void * model
Definition: dnn_interface.h:45
#define NULL
Definition: coverity.c:32
int av_image_get_linesize(enum AVPixelFormat pix_fmt, int width, int plane)
Compute the size of an image line with format pix_fmt and width width for the plane plane...
Definition: imgutils.c:76
static enum AVPixelFormat pix_fmt
Buffered I/O operations.
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2549
This structure describes decoded (raw) audio or video data.
Definition: frame.h:300
#define SWS_BICUBIC
Definition: swscale.h:60
AVOption.
Definition: opt.h:246
static const AVOption dnn_processing_options[]
void(* free_model)(DNNModel **model)
Definition: dnn_interface.h:61
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:71
misc image utilities
Main libavfilter public API header.
packed RGB 8:8:8, 24bpp, RGBRGB...
Definition: pixfmt.h:68
const char * desc
Definition: nvenc.c:79
struct SwsContext * sws_uv_scale
#define OFFSET(x)
int channels
Definition: dnn_interface.h:40
static const AVFilterPad dnn_processing_outputs[]
struct SwsContext * sws_grayf32_to_gray8
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:104
static av_cold void uninit(AVFilterContext *ctx)
uint8_t log2_chroma_w
Amount to shift the luma width right to find the chroma width.
Definition: pixdesc.h:92
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:300
const char * name
Pad name.
Definition: internal.h:60
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:346
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
DNNModel *(* load_model)(const char *model_filename)
Definition: dnn_interface.h:57
AVFilter ff_vf_dnn_processing
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1075
uint8_t
#define av_cold
Definition: attributes.h:88
AVOptions.
static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLink *inlink)
static AVFrame * frame
struct SwsContext * sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat, int dstW, int dstH, enum AVPixelFormat dstFormat, int flags, SwsFilter *srcFilter, SwsFilter *dstFilter, const double *param)
Allocate and return an SwsContext.
Definition: utils.c:1899
static int config_input(AVFilterLink *inlink)
DNNBackendType
Definition: dnn_interface.h:33
external API header
#define av_log(a,...)
DNNDataType
Definition: dnn_interface.h:35
A filter pad used for either input or output.
Definition: internal.h:54
static int copy_from_frame_to_dnn(DnnProcessingContext *ctx, const AVFrame *frame)
DNNReturnType(* get_input)(void *model, DNNData *input, const char *input_name)
Definition: dnn_interface.h:48
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
int width
Definition: frame.h:358
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:605
uint8_t log2_chroma_h
Amount to shift the luma height right to find the chroma height.
Definition: pixdesc.h:101
int height
Definition: dnn_interface.h:40
static av_cold int init(AVFilterContext *context)
#define AVERROR(e)
Definition: error.h:43
#define AV_PIX_FMT_FLAG_RGB
The pixel format contains RGB-like data (as opposed to YUV/grayscale).
Definition: pixdesc.h:148
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:203
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:188
void * data
Definition: dnn_interface.h:38
void * priv
private data for use by the filter
Definition: avfilter.h:353
simple assert() macros that are a bit more flexible than ISO C assert().
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:70
uint64_t flags
Combination of AV_PIX_FMT_FLAG_...
Definition: pixdesc.h:106
uint8_t nb_components
The number of components each pixel has, (1-4)
Definition: pixdesc.h:83
static int copy_uv_planes(DnnProcessingContext *ctx, AVFrame *out, const AVFrame *in)
AVFormatContext * ctx
Definition: movenc.c:48
void sws_freeContext(struct SwsContext *swsContext)
Free the swscaler context swsContext.
Definition: utils.c:2319
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:69
static const AVFilterPad dnn_processing_inputs[]
static int prepare_sws_context(AVFilterLink *outlink)
AVFILTER_DEFINE_CLASS(dnn_processing)
DNN inference engine interface.
static const AVFilterPad inputs[]
Definition: af_acontrast.c:193
DNNReturnType
Definition: dnn_interface.h:31
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
int format
format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames...
Definition: frame.h:373
#define AV_PIX_FMT_GRAYF32
Definition: pixfmt.h:429
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:331
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:81
int attribute_align_arg sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[], const int srcStride[], int srcSliceY, int srcSliceH, uint8_t *const dst[], const int dstStride[])
swscale wrapper, so we don&#39;t need to export the SwsContext.
Definition: swscale.c:744
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Definition: pixfmt.h:72
Describe the class of an AVClass context structure.
Definition: log.h:67
Filter definition.
Definition: avfilter.h:144
static int query_formats(AVFilterContext *context)
static int copy_from_dnn_to_frame(DnnProcessingContext *ctx, AVFrame *frame)
static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt)
const char * name
Filter name.
Definition: avfilter.h:148
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:350
static enum AVPixelFormat pix_fmts[]
Definition: libkvazaar.c:275
DNNModule * ff_get_dnn_module(DNNBackendType backend_type)
Definition: dnn_interface.c:31
DNNReturnType(* execute_model)(const DNNModel *model, DNNData *outputs, uint32_t nb_output)
Definition: dnn_interface.h:59
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:314
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
Y , 8bpp.
Definition: pixfmt.h:74
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
Definition: pixfmt.h:73
#define LOG_FORMAT_CHANNEL_MISMATCH()
A list of supported formats for one end of a filter link.
Definition: formats.h:64
An instance of a filter.
Definition: avfilter.h:338
int height
Definition: frame.h:358
FILE * out
Definition: movenc.c:54
struct SwsContext * sws_gray8_to_grayf32
#define av_freep(p)
DNNReturnType(* set_input_output)(void *model, DNNData *input, const char *input_name, const char **output_names, uint32_t nb_output)
Definition: dnn_interface.h:51
#define av_always_inline
Definition: attributes.h:45
DNNBackendType backend_type
void av_image_copy_plane(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int bytewidth, int height)
Copy image plane from src to dst.
Definition: imgutils.c:338
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2465
internal API functions
static int config_output(AVFilterLink *outlink)
DNNDataType dt
Definition: dnn_interface.h:39
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:659
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:58
#define FLAGS
#define check(x, y, S, v)