FFmpeg  4.3.9
vf_overlay_cuda.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020 Yaroslav Pogrebnyak <yyyaroslav@gmail.com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * Overlay one video on top of another using cuda hardware acceleration
24  */
25 
26 #include "libavutil/log.h"
27 #include "libavutil/mem.h"
28 #include "libavutil/opt.h"
29 #include "libavutil/pixdesc.h"
30 #include "libavutil/hwcontext.h"
32 #include "libavutil/cuda_check.h"
33 
34 #include "avfilter.h"
35 #include "framesync.h"
36 #include "internal.h"
37 
38 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, ctx->hwctx->internal->cuda_dl, x)
39 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
40 
41 #define BLOCK_X 32
42 #define BLOCK_Y 16
43 
44 static const enum AVPixelFormat supported_main_formats[] = {
48 };
49 
55 };
56 
57 /**
58  * OverlayCUDAContext
59  */
60 typedef struct OverlayCUDAContext {
61  const AVClass *class;
62 
65 
68 
69  CUcontext cu_ctx;
70  CUmodule cu_module;
71  CUfunction cu_func;
72  CUstream cu_stream;
73 
75 
78 
80 
81 /**
82  * Helper to find out if provided format is supported by filter
83  */
84 static int format_is_supported(const enum AVPixelFormat formats[], enum AVPixelFormat fmt)
85 {
86  for (int i = 0; formats[i] != AV_PIX_FMT_NONE; i++)
87  if (formats[i] == fmt)
88  return 1;
89  return 0;
90 }
91 
92 /**
93  * Helper checks if we can process main and overlay pixel formats
94  */
95 static int formats_match(const enum AVPixelFormat format_main, const enum AVPixelFormat format_overlay) {
96  switch(format_main) {
97  case AV_PIX_FMT_NV12:
98  return format_overlay == AV_PIX_FMT_NV12;
99  case AV_PIX_FMT_YUV420P:
100  return format_overlay == AV_PIX_FMT_YUV420P ||
101  format_overlay == AV_PIX_FMT_YUVA420P;
102  default:
103  return 0;
104  }
105 }
106 
107 /**
108  * Call overlay kernell for a plane
109  */
112  int x_position, int y_position,
113  uint8_t* main_data, int main_linesize,
114  int main_width, int main_height,
115  uint8_t* overlay_data, int overlay_linesize,
116  int overlay_width, int overlay_height,
117  uint8_t* alpha_data, int alpha_linesize,
118  int alpha_adj_x, int alpha_adj_y) {
119 
120  CudaFunctions *cu = ctx->hwctx->internal->cuda_dl;
121 
122  void* kernel_args[] = {
124  &main_data, &main_linesize,
125  &overlay_data, &overlay_linesize,
126  &overlay_width, &overlay_height,
127  &alpha_data, &alpha_linesize,
128  &alpha_adj_x, &alpha_adj_y,
129  };
130 
131  return CHECK_CU(cu->cuLaunchKernel(
132  ctx->cu_func,
133  DIV_UP(main_width, BLOCK_X), DIV_UP(main_height, BLOCK_Y), 1,
134  BLOCK_X, BLOCK_Y, 1,
135  0, ctx->cu_stream, kernel_args, NULL));
136 }
137 
138 /**
139  * Perform blend overlay picture over main picture
140  */
142 {
143  int ret;
144 
145  AVFilterContext *avctx = fs->parent;
146  OverlayCUDAContext *ctx = avctx->priv;
147  AVFilterLink *outlink = avctx->outputs[0];
148 
149  CudaFunctions *cu = ctx->hwctx->internal->cuda_dl;
150  CUcontext dummy, cuda_ctx = ctx->hwctx->cuda_ctx;
151 
152  AVFrame *input_main, *input_overlay;
153 
154  ctx->cu_ctx = cuda_ctx;
155 
156  // read main and overlay frames from inputs
157  ret = ff_framesync_dualinput_get(fs, &input_main, &input_overlay);
158  if (ret < 0)
159  return ret;
160 
161  if (!input_main)
162  return AVERROR_BUG;
163 
164  if (!input_overlay)
165  return ff_filter_frame(outlink, input_main);
166 
167  ret = av_frame_make_writable(input_main);
168  if (ret < 0) {
169  av_frame_free(&input_main);
170  return ret;
171  }
172 
173  // push cuda context
174 
175  ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
176  if (ret < 0) {
177  av_frame_free(&input_main);
178  return ret;
179  }
180 
181  // overlay first plane
182 
184  ctx->x_position, ctx->y_position,
185  input_main->data[0], input_main->linesize[0],
186  input_main->width, input_main->height,
187  input_overlay->data[0], input_overlay->linesize[0],
188  input_overlay->width, input_overlay->height,
189  input_overlay->data[3], input_overlay->linesize[3], 1, 1);
190 
191  // overlay rest planes depending on pixel format
192 
193  switch(ctx->in_format_overlay) {
194  case AV_PIX_FMT_NV12:
196  ctx->x_position, ctx->y_position / 2,
197  input_main->data[1], input_main->linesize[1],
198  input_main->width, input_main->height / 2,
199  input_overlay->data[1], input_overlay->linesize[1],
200  input_overlay->width, input_overlay->height / 2,
201  0, 0, 0, 0);
202  break;
203  case AV_PIX_FMT_YUV420P:
204  case AV_PIX_FMT_YUVA420P:
206  ctx->x_position / 2 , ctx->y_position / 2,
207  input_main->data[1], input_main->linesize[1],
208  input_main->width / 2, input_main->height / 2,
209  input_overlay->data[1], input_overlay->linesize[1],
210  input_overlay->width / 2, input_overlay->height / 2,
211  input_overlay->data[3], input_overlay->linesize[3], 2, 2);
212 
214  ctx->x_position / 2 , ctx->y_position / 2,
215  input_main->data[2], input_main->linesize[2],
216  input_main->width / 2, input_main->height / 2,
217  input_overlay->data[2], input_overlay->linesize[2],
218  input_overlay->width / 2, input_overlay->height / 2,
219  input_overlay->data[3], input_overlay->linesize[3], 2, 2);
220  break;
221  default:
222  av_log(ctx, AV_LOG_ERROR, "Passed unsupported overlay pixel format\n");
223  av_frame_free(&input_main);
224  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
225  return AVERROR_BUG;
226  }
227 
228  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
229 
230  return ff_filter_frame(outlink, input_main);
231 }
232 
233 /**
234  * Initialize overlay_cuda
235  */
237 {
238  OverlayCUDAContext* ctx = avctx->priv;
240 
241  return 0;
242 }
243 
244 /**
245  * Uninitialize overlay_cuda
246  */
248 {
249  OverlayCUDAContext* ctx = avctx->priv;
250 
251  ff_framesync_uninit(&ctx->fs);
252 
253  if (ctx->hwctx && ctx->cu_module) {
254  CUcontext dummy;
255  CudaFunctions *cu = ctx->hwctx->internal->cuda_dl;
256  CHECK_CU(cu->cuCtxPushCurrent(ctx->cu_ctx));
257  CHECK_CU(cu->cuModuleUnload(ctx->cu_module));
258  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
259  }
260 
262  ctx->hwctx = NULL;
263 }
264 
265 /**
266  * Activate overlay_cuda
267  */
269 {
270  OverlayCUDAContext *ctx = avctx->priv;
271 
272  return ff_framesync_activate(&ctx->fs);
273 }
274 
275 /**
276  * Query formats
277  */
279 {
280  static const enum AVPixelFormat pixel_formats[] = {
282  };
283 
284  AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats);
285 
286  return ff_set_common_formats(avctx, pix_fmts);
287 }
288 
289 /**
290  * Configure output
291  */
293 {
294 
295  extern char vf_overlay_cuda_ptx[];
296 
297  int err;
298  AVFilterContext* avctx = outlink->src;
299  OverlayCUDAContext* ctx = avctx->priv;
300 
301  AVFilterLink *inlink = avctx->inputs[0];
302  AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
303 
304  AVFilterLink *inlink_overlay = avctx->inputs[1];
305  AVHWFramesContext *frames_ctx_overlay = (AVHWFramesContext*)inlink_overlay->hw_frames_ctx->data;
306 
307  CUcontext dummy, cuda_ctx;
308  CudaFunctions *cu;
309 
310  // check main input formats
311 
312  if (!frames_ctx) {
313  av_log(ctx, AV_LOG_ERROR, "No hw context provided on main input\n");
314  return AVERROR(EINVAL);
315  }
316 
317  ctx->in_format_main = frames_ctx->sw_format;
319  av_log(ctx, AV_LOG_ERROR, "Unsupported main input format: %s\n",
321  return AVERROR(ENOSYS);
322  }
323 
324  // check overlay input formats
325 
326  if (!frames_ctx_overlay) {
327  av_log(ctx, AV_LOG_ERROR, "No hw context provided on overlay input\n");
328  return AVERROR(EINVAL);
329  }
330 
331  ctx->in_format_overlay = frames_ctx_overlay->sw_format;
333  av_log(ctx, AV_LOG_ERROR, "Unsupported overlay input format: %s\n",
335  return AVERROR(ENOSYS);
336  }
337 
338  // check we can overlay pictures with those pixel formats
339 
341  av_log(ctx, AV_LOG_ERROR, "Can't overlay %s on %s \n",
343  return AVERROR(EINVAL);
344  }
345 
346  // initialize
347 
348  ctx->hw_device_ctx = av_buffer_ref(frames_ctx->device_ref);
349  if (!ctx->hw_device_ctx)
350  return AVERROR(ENOMEM);
351  ctx->hwctx = ((AVHWDeviceContext*)ctx->hw_device_ctx->data)->hwctx;
352 
353  cuda_ctx = ctx->hwctx->cuda_ctx;
354  ctx->fs.time_base = inlink->time_base;
355 
356  ctx->cu_stream = ctx->hwctx->stream;
357 
358  outlink->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx);
359  if (!outlink->hw_frames_ctx)
360  return AVERROR(ENOMEM);
361 
362  // load functions
363 
364  cu = ctx->hwctx->internal->cuda_dl;
365 
366  err = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
367  if (err < 0) {
368  return err;
369  }
370 
371  err = CHECK_CU(cu->cuModuleLoadData(&ctx->cu_module, vf_overlay_cuda_ptx));
372  if (err < 0) {
373  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
374  return err;
375  }
376 
377  err = CHECK_CU(cu->cuModuleGetFunction(&ctx->cu_func, ctx->cu_module, "Overlay_Cuda"));
378  if (err < 0) {
379  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
380  return err;
381  }
382 
383  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
384 
385  // init dual input
386 
387  err = ff_framesync_init_dualinput(&ctx->fs, avctx);
388  if (err < 0) {
389  return err;
390  }
391 
392  return ff_framesync_configure(&ctx->fs);
393 }
394 
395 
396 #define OFFSET(x) offsetof(OverlayCUDAContext, x)
397 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
398 
399 static const AVOption overlay_cuda_options[] = {
400  { "x", "Overlay x position",
401  OFFSET(x_position), AV_OPT_TYPE_INT, { .i64 = 0 }, INT_MIN, INT_MAX, .flags = FLAGS },
402  { "y", "Overlay y position",
403  OFFSET(y_position), AV_OPT_TYPE_INT, { .i64 = 0 }, INT_MIN, INT_MAX, .flags = FLAGS },
404  { "eof_action", "Action to take when encountering EOF from secondary input ",
406  EOF_ACTION_REPEAT, EOF_ACTION_PASS, .flags = FLAGS, "eof_action" },
407  { "repeat", "Repeat the previous frame.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_REPEAT }, .flags = FLAGS, "eof_action" },
408  { "endall", "End both streams.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_ENDALL }, .flags = FLAGS, "eof_action" },
409  { "pass", "Pass through the main input.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_PASS }, .flags = FLAGS, "eof_action" },
410  { "shortest", "force termination when the shortest input terminates", OFFSET(fs.opt_shortest), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
411  { "repeatlast", "repeat overlay of the last overlay frame", OFFSET(fs.opt_repeatlast), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS },
412  { NULL },
413 };
414 
416 
418  {
419  .name = "main",
420  .type = AVMEDIA_TYPE_VIDEO,
421  },
422  {
423  .name = "overlay",
424  .type = AVMEDIA_TYPE_VIDEO,
425  },
426  { NULL }
427 };
428 
430  {
431  .name = "default",
432  .type = AVMEDIA_TYPE_VIDEO,
433  .config_props = &overlay_cuda_config_output,
434  },
435  { NULL }
436 };
437 
439  .name = "overlay_cuda",
440  .description = NULL_IF_CONFIG_SMALL("Overlay one video on top of another using CUDA"),
441  .priv_size = sizeof(OverlayCUDAContext),
442  .priv_class = &overlay_cuda_class,
447  .inputs = overlay_cuda_inputs,
448  .outputs = overlay_cuda_outputs,
449  .preinit = overlay_cuda_framesync_preinit,
450  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
451 };
This struct aggregates all the (hardware/vendor-specific) "high-level" state, i.e.
Definition: hwcontext.h:61
#define NULL
Definition: coverity.c:32
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: internal.h:365
#define FLAGS
FRAMESYNC_DEFINE_CLASS(overlay_cuda, OverlayCUDAContext, fs)
void av_buffer_unref(AVBufferRef **buf)
Free a given reference and automatically free the buffer if there are no more references to it...
Definition: buffer.c:125
This structure describes decoded (raw) audio or video data.
Definition: frame.h:300
AVOption.
Definition: opt.h:246
Main libavfilter public API header.
Memory handling functions.
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
AVCUDADeviceContextInternal * internal
static enum AVPixelFormat supported_overlay_formats[]
int ff_framesync_configure(FFFrameSync *fs)
Configure a frame sync structure.
Definition: framesync.c:117
#define BLOCK_X
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:300
const char * name
Pad name.
Definition: internal.h:60
AVFilterContext * parent
Parent filter context.
Definition: framesync.h:152
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:346
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1075
static int overlay_cuda_call_kernel(OverlayCUDAContext *ctx, int x_position, int y_position, uint8_t *main_data, int main_linesize, int main_width, int main_height, uint8_t *overlay_data, int overlay_linesize, int overlay_width, int overlay_height, uint8_t *alpha_data, int alpha_linesize, int alpha_adj_x, int alpha_adj_y)
Call overlay kernell for a plane.
planar YUV 4:2:0, 20bpp, (1 Cr & Cb sample per 2x2 Y & A samples)
Definition: pixfmt.h:101
uint8_t
#define av_cold
Definition: attributes.h:88
static av_cold int uninit(AVCodecContext *avctx)
Definition: crystalhd.c:279
AVOptions.
int ff_framesync_init_dualinput(FFFrameSync *fs, AVFilterContext *parent)
Initialize a frame sync structure for dualinput.
Definition: framesync.c:351
#define BLOCK_Y
int ff_framesync_dualinput_get(FFFrameSync *fs, AVFrame **f0, AVFrame **f1)
Definition: framesync.c:369
static const AVFilterPad overlay_cuda_inputs[]
AVBufferRef * hw_device_ctx
#define av_log(a,...)
A filter pad used for either input or output.
Definition: internal.h:54
static int format_is_supported(const enum AVPixelFormat formats[], enum AVPixelFormat fmt)
Helper to find out if provided format is supported by filter.
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
int width
Definition: frame.h:358
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:605
void ff_framesync_uninit(FFFrameSync *fs)
Free all memory currently allocated.
Definition: framesync.c:283
Frame sync structure.
Definition: framesync.h:146
#define AVERROR(e)
Definition: error.h:43
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:203
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:188
void * priv
private data for use by the filter
Definition: avfilter.h:353
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
int opt_shortest
Definition: framesync.h:206
int ff_framesync_activate(FFFrameSync *fs)
Examine the frames in the filter&#39;s input and try to produce output.
Definition: framesync.c:334
int(* on_event)(struct FFFrameSync *fs)
Callback called when a frame event is ready.
Definition: framesync.h:172
int opt_repeatlast
Definition: framesync.h:205
OverlayCUDAContext.
static const AVOption overlay_cuda_options[]
enum AVPixelFormat in_format_main
AVFormatContext * ctx
Definition: movenc.c:48
AVRational time_base
Time base for the output events.
Definition: framesync.h:162
static int activate(AVFilterContext *ctx)
Definition: af_adeclick.c:622
FFmpeg internal API for CUDA.
int dummy
Definition: motion.c:64
static const AVFilterPad inputs[]
Definition: af_acontrast.c:193
HW acceleration through CUDA.
Definition: pixfmt.h:235
static av_cold int overlay_cuda_init(AVFilterContext *avctx)
Initialize overlay_cuda.
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
static int overlay_cuda_activate(AVFilterContext *avctx)
Activate overlay_cuda.
enum AVPixelFormat in_format_overlay
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:331
uint8_t * data
The data buffer.
Definition: buffer.h:89
#define AVERROR_BUG
Internal bug, also see AVERROR_BUG2.
Definition: error.h:50
static av_cold void overlay_cuda_uninit(AVFilterContext *avctx)
Uninitialize overlay_cuda.
This struct is allocated as AVHWDeviceContext.hwctx.
Describe the class of an AVClass context structure.
Definition: log.h:67
Filter definition.
Definition: avfilter.h:144
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:124
const char * name
Filter name.
Definition: avfilter.h:148
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:350
#define OFFSET(x)
static enum AVPixelFormat pix_fmts[]
Definition: libkvazaar.c:275
int av_frame_make_writable(AVFrame *frame)
Ensure that the frame data is writable, avoiding data copy if possible.
Definition: frame.c:612
static const AVFilterPad overlay_cuda_outputs[]
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:314
AVBufferRef * device_ref
A reference to the parent AVHWDeviceContext.
Definition: hwcontext.h:141
int opt_eof_action
Definition: framesync.h:207
A reference to a data buffer.
Definition: buffer.h:81
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
static int query_formats(AVFilterContext *ctx)
Definition: aeval.c:244
AVBufferRef * av_buffer_ref(AVBufferRef *buf)
Create a new reference to an AVBuffer.
Definition: buffer.c:93
#define DIV_UP(a, b)
static int overlay_cuda_blend(FFFrameSync *fs)
Perform blend overlay picture over main picture.
static int formats_match(const enum AVPixelFormat format_main, const enum AVPixelFormat format_overlay)
Helper checks if we can process main and overlay pixel formats.
A list of supported formats for one end of a filter link.
Definition: formats.h:64
static int overlay_cuda_config_output(AVFilterLink *outlink)
Configure output.
An instance of a filter.
Definition: avfilter.h:338
AVCUDADeviceContext * hwctx
#define CHECK_CU(x)
int height
Definition: frame.h:358
formats
Definition: signature.h:48
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2465
internal API functions
enum AVPixelFormat sw_format
The pixel format identifying the actual data layout of the hardware frames.
Definition: hwcontext.h:222
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
AVFilter ff_vf_overlay_cuda
static int overlay_cuda_query_formats(AVFilterContext *avctx)
Query formats.
static enum AVPixelFormat supported_main_formats[]