Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6147 serge 1
/*
2
 * H.264 hardware encoding using nvidia nvenc
3
 * Copyright (c) 2014 Timo Rothenpieler 
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21
 
22
#if defined(_WIN32)
23
#include 
24
#else
25
#include 
26
#endif
27
 
28
#include 
29
 
30
#include "libavutil/internal.h"
31
#include "libavutil/imgutils.h"
32
#include "libavutil/avassert.h"
33
#include "libavutil/opt.h"
34
#include "libavutil/mem.h"
35
#include "avcodec.h"
36
#include "internal.h"
37
#include "thread.h"
38
 
39
#if defined(_WIN32)
40
#define CUDAAPI __stdcall
41
#else
42
#define CUDAAPI
43
#endif
44
 
45
#if defined(_WIN32)
46
#define LOAD_FUNC(l, s) GetProcAddress(l, s)
47
#define DL_CLOSE_FUNC(l) FreeLibrary(l)
48
#else
49
#define LOAD_FUNC(l, s) dlsym(l, s)
50
#define DL_CLOSE_FUNC(l) dlclose(l)
51
#endif
52
 
53
typedef enum cudaError_enum {
54
    CUDA_SUCCESS = 0
55
} CUresult;
56
typedef int CUdevice;
57
typedef void* CUcontext;
58
 
59
typedef CUresult(CUDAAPI *PCUINIT)(unsigned int Flags);
60
typedef CUresult(CUDAAPI *PCUDEVICEGETCOUNT)(int *count);
61
typedef CUresult(CUDAAPI *PCUDEVICEGET)(CUdevice *device, int ordinal);
62
typedef CUresult(CUDAAPI *PCUDEVICEGETNAME)(char *name, int len, CUdevice dev);
63
typedef CUresult(CUDAAPI *PCUDEVICECOMPUTECAPABILITY)(int *major, int *minor, CUdevice dev);
64
typedef CUresult(CUDAAPI *PCUCTXCREATE)(CUcontext *pctx, unsigned int flags, CUdevice dev);
65
typedef CUresult(CUDAAPI *PCUCTXPOPCURRENT)(CUcontext *pctx);
66
typedef CUresult(CUDAAPI *PCUCTXDESTROY)(CUcontext ctx);
67
 
68
typedef NVENCSTATUS (NVENCAPI* PNVENCODEAPICREATEINSTANCE)(NV_ENCODE_API_FUNCTION_LIST *functionList);
69
 
70
typedef struct NvencInputSurface
71
{
72
    NV_ENC_INPUT_PTR input_surface;
73
    int width;
74
    int height;
75
 
76
    int lockCount;
77
 
78
    NV_ENC_BUFFER_FORMAT format;
79
} NvencInputSurface;
80
 
81
typedef struct NvencOutputSurface
82
{
83
    NV_ENC_OUTPUT_PTR output_surface;
84
    int size;
85
 
86
    NvencInputSurface* input_surface;
87
 
88
    int busy;
89
} NvencOutputSurface;
90
 
91
typedef struct NvencData
92
{
93
    union {
94
        int64_t timestamp;
95
        NvencOutputSurface *surface;
96
    } u;
97
} NvencData;
98
 
99
typedef struct NvencDataList
100
{
101
    NvencData* data;
102
 
103
    uint32_t pos;
104
    uint32_t count;
105
    uint32_t size;
106
} NvencDataList;
107
 
108
typedef struct NvencDynLoadFunctions
109
{
110
    PCUINIT cu_init;
111
    PCUDEVICEGETCOUNT cu_device_get_count;
112
    PCUDEVICEGET cu_device_get;
113
    PCUDEVICEGETNAME cu_device_get_name;
114
    PCUDEVICECOMPUTECAPABILITY cu_device_compute_capability;
115
    PCUCTXCREATE cu_ctx_create;
116
    PCUCTXPOPCURRENT cu_ctx_pop_current;
117
    PCUCTXDESTROY cu_ctx_destroy;
118
 
119
    NV_ENCODE_API_FUNCTION_LIST nvenc_funcs;
120
    int nvenc_device_count;
121
    CUdevice nvenc_devices[16];
122
 
123
#if defined(_WIN32)
124
    HMODULE cuda_lib;
125
    HMODULE nvenc_lib;
126
#else
127
    void* cuda_lib;
128
    void* nvenc_lib;
129
#endif
130
} NvencDynLoadFunctions;
131
 
132
typedef struct NvencValuePair
133
{
134
    const char *str;
135
    uint32_t num;
136
} NvencValuePair;
137
 
138
typedef struct NvencContext
139
{
140
    AVClass *avclass;
141
 
142
    NvencDynLoadFunctions nvenc_dload_funcs;
143
 
144
    NV_ENC_INITIALIZE_PARAMS init_encode_params;
145
    NV_ENC_CONFIG encode_config;
146
    CUcontext cu_context;
147
 
148
    int max_surface_count;
149
    NvencInputSurface *input_surfaces;
150
    NvencOutputSurface *output_surfaces;
151
 
152
    NvencDataList output_surface_queue;
153
    NvencDataList output_surface_ready_queue;
154
    NvencDataList timestamp_list;
155
    int64_t last_dts;
156
 
157
    void *nvencoder;
158
 
159
    char *preset;
160
    char *profile;
161
    char *level;
162
    char *tier;
163
    int cbr;
164
    int twopass;
165
    int gpu;
166
    int buffer_delay;
167
} NvencContext;
168
 
169
static const NvencValuePair nvenc_h264_level_pairs[] = {
170
    { "auto", NV_ENC_LEVEL_AUTOSELECT },
171
    { "1"   , NV_ENC_LEVEL_H264_1     },
172
    { "1.0" , NV_ENC_LEVEL_H264_1     },
173
    { "1b"  , NV_ENC_LEVEL_H264_1b    },
174
    { "1.0b", NV_ENC_LEVEL_H264_1b    },
175
    { "1.1" , NV_ENC_LEVEL_H264_11    },
176
    { "1.2" , NV_ENC_LEVEL_H264_12    },
177
    { "1.3" , NV_ENC_LEVEL_H264_13    },
178
    { "2"   , NV_ENC_LEVEL_H264_2     },
179
    { "2.0" , NV_ENC_LEVEL_H264_2     },
180
    { "2.1" , NV_ENC_LEVEL_H264_21    },
181
    { "2.2" , NV_ENC_LEVEL_H264_22    },
182
    { "3"   , NV_ENC_LEVEL_H264_3     },
183
    { "3.0" , NV_ENC_LEVEL_H264_3     },
184
    { "3.1" , NV_ENC_LEVEL_H264_31    },
185
    { "3.2" , NV_ENC_LEVEL_H264_32    },
186
    { "4"   , NV_ENC_LEVEL_H264_4     },
187
    { "4.0" , NV_ENC_LEVEL_H264_4     },
188
    { "4.1" , NV_ENC_LEVEL_H264_41    },
189
    { "4.2" , NV_ENC_LEVEL_H264_42    },
190
    { "5"   , NV_ENC_LEVEL_H264_5     },
191
    { "5.0" , NV_ENC_LEVEL_H264_5     },
192
    { "5.1" , NV_ENC_LEVEL_H264_51    },
193
    { NULL }
194
};
195
 
196
static const NvencValuePair nvenc_hevc_level_pairs[] = {
197
    { "auto", NV_ENC_LEVEL_AUTOSELECT },
198
    { "1"   , NV_ENC_LEVEL_HEVC_1     },
199
    { "1.0" , NV_ENC_LEVEL_HEVC_1     },
200
    { "2"   , NV_ENC_LEVEL_HEVC_2     },
201
    { "2.0" , NV_ENC_LEVEL_HEVC_2     },
202
    { "2.1" , NV_ENC_LEVEL_HEVC_21    },
203
    { "3"   , NV_ENC_LEVEL_HEVC_3     },
204
    { "3.0" , NV_ENC_LEVEL_HEVC_3     },
205
    { "3.1" , NV_ENC_LEVEL_HEVC_31    },
206
    { "4"   , NV_ENC_LEVEL_HEVC_4     },
207
    { "4.0" , NV_ENC_LEVEL_HEVC_4     },
208
    { "4.1" , NV_ENC_LEVEL_HEVC_41    },
209
    { "5"   , NV_ENC_LEVEL_HEVC_5     },
210
    { "5.0" , NV_ENC_LEVEL_HEVC_5     },
211
    { "5.1" , NV_ENC_LEVEL_HEVC_51    },
212
    { "5.2" , NV_ENC_LEVEL_HEVC_52    },
213
    { "6"   , NV_ENC_LEVEL_HEVC_6     },
214
    { "6.0" , NV_ENC_LEVEL_HEVC_6     },
215
    { "6.1" , NV_ENC_LEVEL_HEVC_61    },
216
    { "6.2" , NV_ENC_LEVEL_HEVC_62    },
217
    { NULL }
218
};
219
 
220
static int input_string_to_uint32(AVCodecContext *avctx, const NvencValuePair *pair, const char *input, uint32_t *output)
221
{
222
    for (; pair->str; ++pair) {
223
        if (!strcmp(input, pair->str)) {
224
            *output = pair->num;
225
            return 0;
226
        }
227
    }
228
 
229
    return AVERROR(EINVAL);
230
}
231
 
232
static NvencData* data_queue_dequeue(NvencDataList* queue)
233
{
234
    uint32_t mask;
235
    uint32_t read_pos;
236
 
237
    av_assert0(queue);
238
    av_assert0(queue->size);
239
    av_assert0(queue->data);
240
 
241
    if (!queue->count)
242
        return NULL;
243
 
244
    /* Size always is a multiple of two */
245
    mask = queue->size - 1;
246
    read_pos = (queue->pos - queue->count) & mask;
247
    queue->count--;
248
 
249
    return &queue->data[read_pos];
250
}
251
 
252
static int data_queue_enqueue(NvencDataList* queue, NvencData *data)
253
{
254
    NvencDataList new_queue;
255
    NvencData* tmp_data;
256
    uint32_t mask;
257
 
258
    if (!queue->size) {
259
        /* size always has to be a multiple of two */
260
        queue->size = 4;
261
        queue->pos = 0;
262
        queue->count = 0;
263
 
264
        queue->data = av_malloc(queue->size * sizeof(*(queue->data)));
265
 
266
        if (!queue->data) {
267
            queue->size = 0;
268
            return AVERROR(ENOMEM);
269
        }
270
    }
271
 
272
    if (queue->count == queue->size) {
273
        new_queue.size = queue->size << 1;
274
        new_queue.pos = 0;
275
        new_queue.count = 0;
276
        new_queue.data = av_malloc(new_queue.size * sizeof(*(queue->data)));
277
 
278
        if (!new_queue.data)
279
            return AVERROR(ENOMEM);
280
 
281
        while (tmp_data = data_queue_dequeue(queue))
282
            data_queue_enqueue(&new_queue, tmp_data);
283
 
284
        av_free(queue->data);
285
        *queue = new_queue;
286
    }
287
 
288
    mask = queue->size - 1;
289
 
290
    queue->data[queue->pos] = *data;
291
    queue->pos = (queue->pos + 1) & mask;
292
    queue->count++;
293
 
294
    return 0;
295
}
296
 
297
static int out_surf_queue_enqueue(NvencDataList* queue, NvencOutputSurface* surface)
298
{
299
    NvencData data;
300
    data.u.surface = surface;
301
 
302
    return data_queue_enqueue(queue, &data);
303
}
304
 
305
static NvencOutputSurface* out_surf_queue_dequeue(NvencDataList* queue)
306
{
307
    NvencData* res = data_queue_dequeue(queue);
308
 
309
    if (!res)
310
        return NULL;
311
 
312
    return res->u.surface;
313
}
314
 
315
static int timestamp_queue_enqueue(NvencDataList* queue, int64_t timestamp)
316
{
317
    NvencData data;
318
    data.u.timestamp = timestamp;
319
 
320
    return data_queue_enqueue(queue, &data);
321
}
322
 
323
static int64_t timestamp_queue_dequeue(NvencDataList* queue)
324
{
325
    NvencData* res = data_queue_dequeue(queue);
326
 
327
    if (!res)
328
        return AV_NOPTS_VALUE;
329
 
330
    return res->u.timestamp;
331
}
332
 
333
#define CHECK_LOAD_FUNC(t, f, s) \
334
do { \
335
    (f) = (t)LOAD_FUNC(dl_fn->cuda_lib, s); \
336
    if (!(f)) { \
337
        av_log(avctx, AV_LOG_FATAL, "Failed loading %s from CUDA library\n", s); \
338
        goto error; \
339
    } \
340
} while (0)
341
 
342
static av_cold int nvenc_dyload_cuda(AVCodecContext *avctx)
343
{
344
    NvencContext *ctx = avctx->priv_data;
345
    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
346
 
347
    if (dl_fn->cuda_lib)
348
        return 1;
349
 
350
#if defined(_WIN32)
351
    dl_fn->cuda_lib = LoadLibrary(TEXT("nvcuda.dll"));
352
#else
353
    dl_fn->cuda_lib = dlopen("libcuda.so", RTLD_LAZY);
354
#endif
355
 
356
    if (!dl_fn->cuda_lib) {
357
        av_log(avctx, AV_LOG_FATAL, "Failed loading CUDA library\n");
358
        goto error;
359
    }
360
 
361
    CHECK_LOAD_FUNC(PCUINIT, dl_fn->cu_init, "cuInit");
362
    CHECK_LOAD_FUNC(PCUDEVICEGETCOUNT, dl_fn->cu_device_get_count, "cuDeviceGetCount");
363
    CHECK_LOAD_FUNC(PCUDEVICEGET, dl_fn->cu_device_get, "cuDeviceGet");
364
    CHECK_LOAD_FUNC(PCUDEVICEGETNAME, dl_fn->cu_device_get_name, "cuDeviceGetName");
365
    CHECK_LOAD_FUNC(PCUDEVICECOMPUTECAPABILITY, dl_fn->cu_device_compute_capability, "cuDeviceComputeCapability");
366
    CHECK_LOAD_FUNC(PCUCTXCREATE, dl_fn->cu_ctx_create, "cuCtxCreate_v2");
367
    CHECK_LOAD_FUNC(PCUCTXPOPCURRENT, dl_fn->cu_ctx_pop_current, "cuCtxPopCurrent_v2");
368
    CHECK_LOAD_FUNC(PCUCTXDESTROY, dl_fn->cu_ctx_destroy, "cuCtxDestroy_v2");
369
 
370
    return 1;
371
 
372
error:
373
 
374
    if (dl_fn->cuda_lib)
375
        DL_CLOSE_FUNC(dl_fn->cuda_lib);
376
 
377
    dl_fn->cuda_lib = NULL;
378
 
379
    return 0;
380
}
381
 
382
static av_cold int check_cuda_errors(AVCodecContext *avctx, CUresult err, const char *func)
383
{
384
    if (err != CUDA_SUCCESS) {
385
        av_log(avctx, AV_LOG_FATAL, ">> %s - failed with error code 0x%x\n", func, err);
386
        return 0;
387
    }
388
    return 1;
389
}
390
#define check_cuda_errors(f) if (!check_cuda_errors(avctx, f, #f)) goto error
391
 
392
static av_cold int nvenc_check_cuda(AVCodecContext *avctx)
393
{
394
    int device_count = 0;
395
    CUdevice cu_device = 0;
396
    char gpu_name[128];
397
    int smminor = 0, smmajor = 0;
398
    int i, smver, target_smver;
399
 
400
    NvencContext *ctx = avctx->priv_data;
401
    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
402
 
403
    switch (avctx->codec->id) {
404
    case AV_CODEC_ID_H264:
405
        target_smver = avctx->pix_fmt == AV_PIX_FMT_YUV444P ? 0x52 : 0x30;
406
        break;
407
    case AV_CODEC_ID_H265:
408
        target_smver = 0x52;
409
        break;
410
    default:
411
        av_log(avctx, AV_LOG_FATAL, "nvenc: Unknown codec name\n");
412
        goto error;
413
    }
414
 
415
    if (!nvenc_dyload_cuda(avctx))
416
        return 0;
417
 
418
    if (dl_fn->nvenc_device_count > 0)
419
        return 1;
420
 
421
    check_cuda_errors(dl_fn->cu_init(0));
422
 
423
    check_cuda_errors(dl_fn->cu_device_get_count(&device_count));
424
 
425
    if (!device_count) {
426
        av_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n");
427
        goto error;
428
    }
429
 
430
    av_log(avctx, AV_LOG_VERBOSE, "%d CUDA capable devices found\n", device_count);
431
 
432
    dl_fn->nvenc_device_count = 0;
433
 
434
    for (i = 0; i < device_count; ++i) {
435
        check_cuda_errors(dl_fn->cu_device_get(&cu_device, i));
436
        check_cuda_errors(dl_fn->cu_device_get_name(gpu_name, sizeof(gpu_name), cu_device));
437
        check_cuda_errors(dl_fn->cu_device_compute_capability(&smmajor, &smminor, cu_device));
438
 
439
        smver = (smmajor << 4) | smminor;
440
 
441
        av_log(avctx, AV_LOG_VERBOSE, "[ GPU #%d - < %s > has Compute SM %d.%d, NVENC %s ]\n", i, gpu_name, smmajor, smminor, (smver >= target_smver) ? "Available" : "Not Available");
442
 
443
        if (smver >= target_smver)
444
            dl_fn->nvenc_devices[dl_fn->nvenc_device_count++] = cu_device;
445
    }
446
 
447
    if (!dl_fn->nvenc_device_count) {
448
        av_log(avctx, AV_LOG_FATAL, "No NVENC capable devices found\n");
449
        goto error;
450
    }
451
 
452
    return 1;
453
 
454
error:
455
 
456
    dl_fn->nvenc_device_count = 0;
457
 
458
    return 0;
459
}
460
 
461
static av_cold int nvenc_dyload_nvenc(AVCodecContext *avctx)
462
{
463
    PNVENCODEAPICREATEINSTANCE nvEncodeAPICreateInstance = 0;
464
    NVENCSTATUS nvstatus;
465
 
466
    NvencContext *ctx = avctx->priv_data;
467
    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
468
 
469
    if (!nvenc_check_cuda(avctx))
470
        return 0;
471
 
472
    if (dl_fn->nvenc_lib)
473
        return 1;
474
 
475
#if defined(_WIN32)
476
    if (sizeof(void*) == 8) {
477
        dl_fn->nvenc_lib = LoadLibrary(TEXT("nvEncodeAPI64.dll"));
478
    } else {
479
        dl_fn->nvenc_lib = LoadLibrary(TEXT("nvEncodeAPI.dll"));
480
    }
481
#else
482
    dl_fn->nvenc_lib = dlopen("libnvidia-encode.so.1", RTLD_LAZY);
483
#endif
484
 
485
    if (!dl_fn->nvenc_lib) {
486
        av_log(avctx, AV_LOG_FATAL, "Failed loading the nvenc library\n");
487
        goto error;
488
    }
489
 
490
    nvEncodeAPICreateInstance = (PNVENCODEAPICREATEINSTANCE)LOAD_FUNC(dl_fn->nvenc_lib, "NvEncodeAPICreateInstance");
491
 
492
    if (!nvEncodeAPICreateInstance) {
493
        av_log(avctx, AV_LOG_FATAL, "Failed to load nvenc entrypoint\n");
494
        goto error;
495
    }
496
 
497
    dl_fn->nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER;
498
 
499
    nvstatus = nvEncodeAPICreateInstance(&dl_fn->nvenc_funcs);
500
 
501
    if (nvstatus != NV_ENC_SUCCESS) {
502
        av_log(avctx, AV_LOG_FATAL, "Failed to create nvenc instance\n");
503
        goto error;
504
    }
505
 
506
    av_log(avctx, AV_LOG_VERBOSE, "Nvenc initialized successfully\n");
507
 
508
    return 1;
509
 
510
error:
511
    if (dl_fn->nvenc_lib)
512
        DL_CLOSE_FUNC(dl_fn->nvenc_lib);
513
 
514
    dl_fn->nvenc_lib = NULL;
515
 
516
    return 0;
517
}
518
 
519
static av_cold void nvenc_unload_nvenc(AVCodecContext *avctx)
520
{
521
    NvencContext *ctx = avctx->priv_data;
522
    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
523
 
524
    DL_CLOSE_FUNC(dl_fn->nvenc_lib);
525
    dl_fn->nvenc_lib = NULL;
526
 
527
    dl_fn->nvenc_device_count = 0;
528
 
529
    DL_CLOSE_FUNC(dl_fn->cuda_lib);
530
    dl_fn->cuda_lib = NULL;
531
 
532
    dl_fn->cu_init = NULL;
533
    dl_fn->cu_device_get_count = NULL;
534
    dl_fn->cu_device_get = NULL;
535
    dl_fn->cu_device_get_name = NULL;
536
    dl_fn->cu_device_compute_capability = NULL;
537
    dl_fn->cu_ctx_create = NULL;
538
    dl_fn->cu_ctx_pop_current = NULL;
539
    dl_fn->cu_ctx_destroy = NULL;
540
 
541
    av_log(avctx, AV_LOG_VERBOSE, "Nvenc unloaded\n");
542
}
543
 
544
static av_cold int nvenc_encode_init(AVCodecContext *avctx)
545
{
546
    NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS encode_session_params = { 0 };
547
    NV_ENC_PRESET_CONFIG preset_config = { 0 };
548
    CUcontext cu_context_curr;
549
    CUresult cu_res;
550
    GUID encoder_preset = NV_ENC_PRESET_HQ_GUID;
551
    GUID codec;
552
    NVENCSTATUS nv_status = NV_ENC_SUCCESS;
553
    int surfaceCount = 0;
554
    int i, num_mbs;
555
    int isLL = 0;
556
    int lossless = 0;
557
    int res = 0;
558
    int dw, dh;
559
 
560
    NvencContext *ctx = avctx->priv_data;
561
    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
562
    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
563
 
564
    if (!nvenc_dyload_nvenc(avctx))
565
        return AVERROR_EXTERNAL;
566
 
567
    ctx->last_dts = AV_NOPTS_VALUE;
568
 
569
    ctx->encode_config.version = NV_ENC_CONFIG_VER;
570
    ctx->init_encode_params.version = NV_ENC_INITIALIZE_PARAMS_VER;
571
    preset_config.version = NV_ENC_PRESET_CONFIG_VER;
572
    preset_config.presetCfg.version = NV_ENC_CONFIG_VER;
573
    encode_session_params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
574
    encode_session_params.apiVersion = NVENCAPI_VERSION;
575
 
576
    if (ctx->gpu >= dl_fn->nvenc_device_count) {
577
        av_log(avctx, AV_LOG_FATAL, "Requested GPU %d, but only %d GPUs are available!\n", ctx->gpu, dl_fn->nvenc_device_count);
578
        res = AVERROR(EINVAL);
579
        goto error;
580
    }
581
 
582
    ctx->cu_context = NULL;
583
    cu_res = dl_fn->cu_ctx_create(&ctx->cu_context, 0, dl_fn->nvenc_devices[ctx->gpu]);
584
 
585
    if (cu_res != CUDA_SUCCESS) {
586
        av_log(avctx, AV_LOG_FATAL, "Failed creating CUDA context for NVENC: 0x%x\n", (int)cu_res);
587
        res = AVERROR_EXTERNAL;
588
        goto error;
589
    }
590
 
591
    cu_res = dl_fn->cu_ctx_pop_current(&cu_context_curr);
592
 
593
    if (cu_res != CUDA_SUCCESS) {
594
        av_log(avctx, AV_LOG_FATAL, "Failed popping CUDA context: 0x%x\n", (int)cu_res);
595
        res = AVERROR_EXTERNAL;
596
        goto error;
597
    }
598
 
599
    encode_session_params.device = ctx->cu_context;
600
    encode_session_params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
601
 
602
    nv_status = p_nvenc->nvEncOpenEncodeSessionEx(&encode_session_params, &ctx->nvencoder);
603
    if (nv_status != NV_ENC_SUCCESS) {
604
        ctx->nvencoder = NULL;
605
        av_log(avctx, AV_LOG_FATAL, "OpenEncodeSessionEx failed: 0x%x - invalid license key?\n", (int)nv_status);
606
        res = AVERROR_EXTERNAL;
607
        goto error;
608
    }
609
 
610
    if (ctx->preset) {
611
        if (!strcmp(ctx->preset, "hp")) {
612
            encoder_preset = NV_ENC_PRESET_HP_GUID;
613
        } else if (!strcmp(ctx->preset, "hq")) {
614
            encoder_preset = NV_ENC_PRESET_HQ_GUID;
615
        } else if (!strcmp(ctx->preset, "bd")) {
616
            encoder_preset = NV_ENC_PRESET_BD_GUID;
617
        } else if (!strcmp(ctx->preset, "ll")) {
618
            encoder_preset = NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID;
619
            isLL = 1;
620
        } else if (!strcmp(ctx->preset, "llhp")) {
621
            encoder_preset = NV_ENC_PRESET_LOW_LATENCY_HP_GUID;
622
            isLL = 1;
623
        } else if (!strcmp(ctx->preset, "llhq")) {
624
            encoder_preset = NV_ENC_PRESET_LOW_LATENCY_HQ_GUID;
625
            isLL = 1;
626
        } else if (!strcmp(ctx->preset, "lossless")) {
627
            encoder_preset = NV_ENC_PRESET_LOSSLESS_DEFAULT_GUID;
628
            lossless = 1;
629
        } else if (!strcmp(ctx->preset, "losslesshp")) {
630
            encoder_preset = NV_ENC_PRESET_LOSSLESS_HP_GUID;
631
            lossless = 1;
632
        } else if (!strcmp(ctx->preset, "default")) {
633
            encoder_preset = NV_ENC_PRESET_DEFAULT_GUID;
634
        } else {
635
            av_log(avctx, AV_LOG_FATAL, "Preset \"%s\" is unknown! Supported presets: hp, hq, bd, ll, llhp, llhq, lossless, losslesshp, default\n", ctx->preset);
636
            res = AVERROR(EINVAL);
637
            goto error;
638
        }
639
    }
640
 
641
    switch (avctx->codec->id) {
642
    case AV_CODEC_ID_H264:
643
        codec = NV_ENC_CODEC_H264_GUID;
644
        break;
645
    case AV_CODEC_ID_H265:
646
        codec = NV_ENC_CODEC_HEVC_GUID;
647
        break;
648
    default:
649
        av_log(avctx, AV_LOG_ERROR, "nvenc: Unknown codec name\n");
650
        res = AVERROR(EINVAL);
651
        goto error;
652
    }
653
 
654
    nv_status = p_nvenc->nvEncGetEncodePresetConfig(ctx->nvencoder, codec, encoder_preset, &preset_config);
655
    if (nv_status != NV_ENC_SUCCESS) {
656
        av_log(avctx, AV_LOG_FATAL, "GetEncodePresetConfig failed: 0x%x\n", (int)nv_status);
657
        res = AVERROR_EXTERNAL;
658
        goto error;
659
    }
660
 
661
    ctx->init_encode_params.encodeGUID = codec;
662
    ctx->init_encode_params.encodeHeight = avctx->height;
663
    ctx->init_encode_params.encodeWidth = avctx->width;
664
 
665
    if (avctx->sample_aspect_ratio.num && avctx->sample_aspect_ratio.den &&
666
        (avctx->sample_aspect_ratio.num != 1 || avctx->sample_aspect_ratio.num != 1)) {
667
        av_reduce(&dw, &dh,
668
                  avctx->width * avctx->sample_aspect_ratio.num,
669
                  avctx->height * avctx->sample_aspect_ratio.den,
670
                  1024 * 1024);
671
        ctx->init_encode_params.darHeight = dh;
672
        ctx->init_encode_params.darWidth = dw;
673
    } else {
674
        ctx->init_encode_params.darHeight = avctx->height;
675
        ctx->init_encode_params.darWidth = avctx->width;
676
    }
677
 
678
    // De-compensate for hardware, dubiously, trying to compensate for
679
    // playback at 704 pixel width.
680
    if (avctx->width == 720 &&
681
        (avctx->height == 480 || avctx->height == 576)) {
682
        av_reduce(&dw, &dh,
683
                  ctx->init_encode_params.darWidth * 44,
684
                  ctx->init_encode_params.darHeight * 45,
685
                  1024 * 1024);
686
        ctx->init_encode_params.darHeight = dh;
687
        ctx->init_encode_params.darWidth = dw;
688
    }
689
 
690
    ctx->init_encode_params.frameRateNum = avctx->time_base.den;
691
    ctx->init_encode_params.frameRateDen = avctx->time_base.num * avctx->ticks_per_frame;
692
 
693
    num_mbs = ((avctx->width + 15) >> 4) * ((avctx->height + 15) >> 4);
694
    ctx->max_surface_count = (num_mbs >= 8160) ? 32 : 48;
695
 
696
    if (ctx->buffer_delay >= ctx->max_surface_count)
697
        ctx->buffer_delay = ctx->max_surface_count - 1;
698
 
699
    ctx->init_encode_params.enableEncodeAsync = 0;
700
    ctx->init_encode_params.enablePTD = 1;
701
 
702
    ctx->init_encode_params.presetGUID = encoder_preset;
703
 
704
    ctx->init_encode_params.encodeConfig = &ctx->encode_config;
705
    memcpy(&ctx->encode_config, &preset_config.presetCfg, sizeof(ctx->encode_config));
706
    ctx->encode_config.version = NV_ENC_CONFIG_VER;
707
 
708
    if (avctx->refs >= 0) {
709
        /* 0 means "let the hardware decide" */
710
        switch (avctx->codec->id) {
711
        case AV_CODEC_ID_H264:
712
            ctx->encode_config.encodeCodecConfig.h264Config.maxNumRefFrames = avctx->refs;
713
            break;
714
        case AV_CODEC_ID_H265:
715
            ctx->encode_config.encodeCodecConfig.hevcConfig.maxNumRefFramesInDPB = avctx->refs;
716
            break;
717
        /* Earlier switch/case will return if unknown codec is passed. */
718
        }
719
    }
720
 
721
    if (avctx->gop_size > 0) {
722
        if (avctx->max_b_frames >= 0) {
723
            /* 0 is intra-only, 1 is I/P only, 2 is one B Frame, 3 two B frames, and so on. */
724
            ctx->encode_config.frameIntervalP = avctx->max_b_frames + 1;
725
        }
726
 
727
        ctx->encode_config.gopLength = avctx->gop_size;
728
        switch (avctx->codec->id) {
729
        case AV_CODEC_ID_H264:
730
            ctx->encode_config.encodeCodecConfig.h264Config.idrPeriod = avctx->gop_size;
731
            break;
732
        case AV_CODEC_ID_H265:
733
            ctx->encode_config.encodeCodecConfig.hevcConfig.idrPeriod = avctx->gop_size;
734
            break;
735
        /* Earlier switch/case will return if unknown codec is passed. */
736
        }
737
    } else if (avctx->gop_size == 0) {
738
        ctx->encode_config.frameIntervalP = 0;
739
        ctx->encode_config.gopLength = 1;
740
        switch (avctx->codec->id) {
741
        case AV_CODEC_ID_H264:
742
            ctx->encode_config.encodeCodecConfig.h264Config.idrPeriod = 1;
743
            break;
744
        case AV_CODEC_ID_H265:
745
            ctx->encode_config.encodeCodecConfig.hevcConfig.idrPeriod = 1;
746
            break;
747
        /* Earlier switch/case will return if unknown codec is passed. */
748
        }
749
    }
750
 
751
    /* when there're b frames, set dts offset */
752
    if (ctx->encode_config.frameIntervalP >= 2)
753
        ctx->last_dts = -2;
754
 
755
    if (avctx->bit_rate > 0)
756
        ctx->encode_config.rcParams.averageBitRate = avctx->bit_rate;
757
 
758
    if (avctx->rc_max_rate > 0)
759
        ctx->encode_config.rcParams.maxBitRate = avctx->rc_max_rate;
760
 
761
    if (lossless) {
762
        if (avctx->codec->id == AV_CODEC_ID_H264)
763
            ctx->encode_config.encodeCodecConfig.h264Config.qpPrimeYZeroTransformBypassFlag = 1;
764
 
765
        ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
766
        ctx->encode_config.rcParams.constQP.qpInterB = 0;
767
        ctx->encode_config.rcParams.constQP.qpInterP = 0;
768
        ctx->encode_config.rcParams.constQP.qpIntra = 0;
769
 
770
        avctx->qmin = -1;
771
        avctx->qmax = -1;
772
    } else if (ctx->cbr) {
773
        if (!ctx->twopass) {
774
            ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
775
        } else if (ctx->twopass == 1 || isLL) {
776
            ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_2_PASS_QUALITY;
777
 
778
            if (avctx->codec->id == AV_CODEC_ID_H264) {
779
                ctx->encode_config.encodeCodecConfig.h264Config.adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
780
                ctx->encode_config.encodeCodecConfig.h264Config.fmoMode = NV_ENC_H264_FMO_DISABLE;
781
            }
782
        } else {
783
            ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
784
        }
785
    } else if (avctx->global_quality > 0) {
786
        ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
787
        ctx->encode_config.rcParams.constQP.qpInterB = avctx->global_quality;
788
        ctx->encode_config.rcParams.constQP.qpInterP = avctx->global_quality;
789
        ctx->encode_config.rcParams.constQP.qpIntra = avctx->global_quality;
790
 
791
        avctx->qmin = -1;
792
        avctx->qmax = -1;
793
    } else if (avctx->qmin >= 0 && avctx->qmax >= 0) {
794
        if (ctx->twopass == 1) {
795
            ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_2_PASS_VBR;
796
 
797
            if (avctx->codec->id == AV_CODEC_ID_H264) {
798
                ctx->encode_config.encodeCodecConfig.h264Config.adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
799
                ctx->encode_config.encodeCodecConfig.h264Config.fmoMode = NV_ENC_H264_FMO_DISABLE;
800
            }
801
        } else {
802
            ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
803
        }
804
 
805
        ctx->encode_config.rcParams.enableMinQP = 1;
806
        ctx->encode_config.rcParams.enableMaxQP = 1;
807
 
808
        ctx->encode_config.rcParams.minQP.qpInterB = avctx->qmin;
809
        ctx->encode_config.rcParams.minQP.qpInterP = avctx->qmin;
810
        ctx->encode_config.rcParams.minQP.qpIntra = avctx->qmin;
811
 
812
        ctx->encode_config.rcParams.maxQP.qpInterB = avctx->qmax;
813
        ctx->encode_config.rcParams.maxQP.qpInterP = avctx->qmax;
814
        ctx->encode_config.rcParams.maxQP.qpIntra = avctx->qmax;
815
    }
816
 
817
    if (avctx->rc_buffer_size > 0)
818
        ctx->encode_config.rcParams.vbvBufferSize = avctx->rc_buffer_size;
819
 
820
    if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
821
        ctx->encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD;
822
    } else {
823
        ctx->encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME;
824
    }
825
 
826
    switch (avctx->codec->id) {
827
    case AV_CODEC_ID_H264:
828
        ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.colourDescriptionPresentFlag = 1;
829
        ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.videoSignalTypePresentFlag = 1;
830
 
831
        ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.colourMatrix = avctx->colorspace;
832
        ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.colourPrimaries = avctx->color_primaries;
833
        ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.transferCharacteristics = avctx->color_trc;
834
 
835
        ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.videoFullRangeFlag = avctx->color_range == AVCOL_RANGE_JPEG;
836
 
837
        ctx->encode_config.encodeCodecConfig.h264Config.disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
838
        ctx->encode_config.encodeCodecConfig.h264Config.repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
839
 
840
        if (!ctx->profile) {
841
            switch (avctx->profile) {
842
            case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
843
                ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
844
                break;
845
            case FF_PROFILE_H264_BASELINE:
846
                ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
847
                break;
848
            case FF_PROFILE_H264_MAIN:
849
                ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
850
                break;
851
            case FF_PROFILE_H264_HIGH:
852
            case FF_PROFILE_UNKNOWN:
853
                ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
854
                break;
855
            default:
856
                av_log(avctx, AV_LOG_WARNING, "Unsupported profile requested, falling back to high\n");
857
                ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
858
                break;
859
            }
860
        } else {
861
            if (!strcmp(ctx->profile, "high")) {
862
                ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
863
                avctx->profile = FF_PROFILE_H264_HIGH;
864
            } else if (!strcmp(ctx->profile, "main")) {
865
                ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
866
                avctx->profile = FF_PROFILE_H264_MAIN;
867
            } else if (!strcmp(ctx->profile, "baseline")) {
868
                ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
869
                avctx->profile = FF_PROFILE_H264_BASELINE;
870
            } else if (!strcmp(ctx->profile, "high444p")) {
871
                ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
872
                avctx->profile = FF_PROFILE_H264_HIGH_444_PREDICTIVE;
873
            } else {
874
                av_log(avctx, AV_LOG_FATAL, "Profile \"%s\" is unknown! Supported profiles: high, main, baseline\n", ctx->profile);
875
                res = AVERROR(EINVAL);
876
                goto error;
877
            }
878
        }
879
 
880
        ctx->encode_config.encodeCodecConfig.h264Config.chromaFormatIDC = avctx->profile == FF_PROFILE_H264_HIGH_444_PREDICTIVE ? 3 : 1;
881
 
882
        if (ctx->level) {
883
            res = input_string_to_uint32(avctx, nvenc_h264_level_pairs, ctx->level, &ctx->encode_config.encodeCodecConfig.h264Config.level);
884
 
885
            if (res) {
886
                av_log(avctx, AV_LOG_FATAL, "Level \"%s\" is unknown! Supported levels: auto, 1, 1b, 1.1, 1.2, 1.3, 2, 2.1, 2.2, 3, 3.1, 3.2, 4, 4.1, 4.2, 5, 5.1\n", ctx->level);
887
                goto error;
888
            }
889
        } else {
890
            ctx->encode_config.encodeCodecConfig.h264Config.level = NV_ENC_LEVEL_AUTOSELECT;
891
        }
892
 
893
        break;
894
    case AV_CODEC_ID_H265:
895
        ctx->encode_config.encodeCodecConfig.hevcConfig.disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
896
        ctx->encode_config.encodeCodecConfig.hevcConfig.repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
897
 
898
        /* No other profile is supported in the current SDK version 5 */
899
        ctx->encode_config.profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
900
        avctx->profile = FF_PROFILE_HEVC_MAIN;
901
 
902
        if (ctx->level) {
903
            res = input_string_to_uint32(avctx, nvenc_hevc_level_pairs, ctx->level, &ctx->encode_config.encodeCodecConfig.hevcConfig.level);
904
 
905
            if (res) {
906
                av_log(avctx, AV_LOG_FATAL, "Level \"%s\" is unknown! Supported levels: auto, 1, 2, 2.1, 3, 3.1, 4, 4.1, 5, 5.1, 5.2, 6, 6.1, 6.2\n", ctx->level);
907
                goto error;
908
            }
909
        } else {
910
            ctx->encode_config.encodeCodecConfig.hevcConfig.level = NV_ENC_LEVEL_AUTOSELECT;
911
        }
912
 
913
        if (ctx->tier) {
914
            if (!strcmp(ctx->tier, "main")) {
915
                ctx->encode_config.encodeCodecConfig.hevcConfig.tier = NV_ENC_TIER_HEVC_MAIN;
916
            } else if (!strcmp(ctx->tier, "high")) {
917
                ctx->encode_config.encodeCodecConfig.hevcConfig.tier = NV_ENC_TIER_HEVC_HIGH;
918
            } else {
919
                av_log(avctx, AV_LOG_FATAL, "Tier \"%s\" is unknown! Supported tiers: main, high\n", ctx->tier);
920
                res = AVERROR(EINVAL);
921
                goto error;
922
            }
923
        }
924
 
925
        break;
926
    /* Earlier switch/case will return if unknown codec is passed. */
927
    }
928
 
929
    nv_status = p_nvenc->nvEncInitializeEncoder(ctx->nvencoder, &ctx->init_encode_params);
930
    if (nv_status != NV_ENC_SUCCESS) {
931
        av_log(avctx, AV_LOG_FATAL, "InitializeEncoder failed: 0x%x\n", (int)nv_status);
932
        res = AVERROR_EXTERNAL;
933
        goto error;
934
    }
935
 
936
    ctx->input_surfaces = av_malloc(ctx->max_surface_count * sizeof(*ctx->input_surfaces));
937
 
938
    if (!ctx->input_surfaces) {
939
        res = AVERROR(ENOMEM);
940
        goto error;
941
    }
942
 
943
    ctx->output_surfaces = av_malloc(ctx->max_surface_count * sizeof(*ctx->output_surfaces));
944
 
945
    if (!ctx->output_surfaces) {
946
        res = AVERROR(ENOMEM);
947
        goto error;
948
    }
949
 
950
    for (surfaceCount = 0; surfaceCount < ctx->max_surface_count; ++surfaceCount) {
951
        NV_ENC_CREATE_INPUT_BUFFER allocSurf = { 0 };
952
        NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
953
        allocSurf.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
954
        allocOut.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
955
 
956
        allocSurf.width = (avctx->width + 31) & ~31;
957
        allocSurf.height = (avctx->height + 31) & ~31;
958
 
959
        allocSurf.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
960
 
961
        switch (avctx->pix_fmt) {
962
        case AV_PIX_FMT_YUV420P:
963
            allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_YV12_PL;
964
            break;
965
 
966
        case AV_PIX_FMT_NV12:
967
            allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_NV12_PL;
968
            break;
969
 
970
        case AV_PIX_FMT_YUV444P:
971
            allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_YUV444_PL;
972
            break;
973
 
974
        default:
975
            av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format\n");
976
            res = AVERROR(EINVAL);
977
            goto error;
978
        }
979
 
980
        nv_status = p_nvenc->nvEncCreateInputBuffer(ctx->nvencoder, &allocSurf);
981
        if (nv_status != NV_ENC_SUCCESS) {
982
            av_log(avctx, AV_LOG_FATAL, "CreateInputBuffer failed\n");
983
            res = AVERROR_EXTERNAL;
984
            goto error;
985
        }
986
 
987
        ctx->input_surfaces[surfaceCount].lockCount = 0;
988
        ctx->input_surfaces[surfaceCount].input_surface = allocSurf.inputBuffer;
989
        ctx->input_surfaces[surfaceCount].format = allocSurf.bufferFmt;
990
        ctx->input_surfaces[surfaceCount].width = allocSurf.width;
991
        ctx->input_surfaces[surfaceCount].height = allocSurf.height;
992
 
993
        /* 1MB is large enough to hold most output frames. NVENC increases this automaticaly if it's not enough. */
994
        allocOut.size = 1024 * 1024;
995
 
996
        allocOut.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
997
 
998
        nv_status = p_nvenc->nvEncCreateBitstreamBuffer(ctx->nvencoder, &allocOut);
999
        if (nv_status != NV_ENC_SUCCESS) {
1000
            av_log(avctx, AV_LOG_FATAL, "CreateBitstreamBuffer failed\n");
1001
            ctx->output_surfaces[surfaceCount++].output_surface = NULL;
1002
            res = AVERROR_EXTERNAL;
1003
            goto error;
1004
        }
1005
 
1006
        ctx->output_surfaces[surfaceCount].output_surface = allocOut.bitstreamBuffer;
1007
        ctx->output_surfaces[surfaceCount].size = allocOut.size;
1008
        ctx->output_surfaces[surfaceCount].busy = 0;
1009
    }
1010
 
1011
    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
1012
        uint32_t outSize = 0;
1013
        char tmpHeader[256];
1014
        NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = { 0 };
1015
        payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
1016
 
1017
        payload.spsppsBuffer = tmpHeader;
1018
        payload.inBufferSize = sizeof(tmpHeader);
1019
        payload.outSPSPPSPayloadSize = &outSize;
1020
 
1021
        nv_status = p_nvenc->nvEncGetSequenceParams(ctx->nvencoder, &payload);
1022
        if (nv_status != NV_ENC_SUCCESS) {
1023
            av_log(avctx, AV_LOG_FATAL, "GetSequenceParams failed\n");
1024
            goto error;
1025
        }
1026
 
1027
        avctx->extradata_size = outSize;
1028
        avctx->extradata = av_mallocz(outSize + AV_INPUT_BUFFER_PADDING_SIZE);
1029
 
1030
        if (!avctx->extradata) {
1031
            res = AVERROR(ENOMEM);
1032
            goto error;
1033
        }
1034
 
1035
        memcpy(avctx->extradata, tmpHeader, outSize);
1036
    }
1037
 
1038
    if (ctx->encode_config.frameIntervalP > 1)
1039
        avctx->has_b_frames = 2;
1040
 
1041
    if (ctx->encode_config.rcParams.averageBitRate > 0)
1042
        avctx->bit_rate = ctx->encode_config.rcParams.averageBitRate;
1043
 
1044
    return 0;
1045
 
1046
error:
1047
 
1048
    for (i = 0; i < surfaceCount; ++i) {
1049
        p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->input_surfaces[i].input_surface);
1050
        if (ctx->output_surfaces[i].output_surface)
1051
            p_nvenc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->output_surfaces[i].output_surface);
1052
    }
1053
 
1054
    if (ctx->nvencoder)
1055
        p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
1056
 
1057
    if (ctx->cu_context)
1058
        dl_fn->cu_ctx_destroy(ctx->cu_context);
1059
 
1060
    nvenc_unload_nvenc(avctx);
1061
 
1062
    ctx->nvencoder = NULL;
1063
    ctx->cu_context = NULL;
1064
 
1065
    return res;
1066
}
1067
 
1068
static av_cold int nvenc_encode_close(AVCodecContext *avctx)
1069
{
1070
    NvencContext *ctx = avctx->priv_data;
1071
    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1072
    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1073
    int i;
1074
 
1075
    av_freep(&ctx->timestamp_list.data);
1076
    av_freep(&ctx->output_surface_ready_queue.data);
1077
    av_freep(&ctx->output_surface_queue.data);
1078
 
1079
    for (i = 0; i < ctx->max_surface_count; ++i) {
1080
        p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->input_surfaces[i].input_surface);
1081
        p_nvenc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->output_surfaces[i].output_surface);
1082
    }
1083
    ctx->max_surface_count = 0;
1084
 
1085
    p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
1086
    ctx->nvencoder = NULL;
1087
 
1088
    dl_fn->cu_ctx_destroy(ctx->cu_context);
1089
    ctx->cu_context = NULL;
1090
 
1091
    nvenc_unload_nvenc(avctx);
1092
 
1093
    return 0;
1094
}
1095
 
1096
static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, NvencOutputSurface *tmpoutsurf)
1097
{
1098
    NvencContext *ctx = avctx->priv_data;
1099
    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1100
    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1101
 
1102
    uint32_t slice_mode_data;
1103
    uint32_t *slice_offsets;
1104
    NV_ENC_LOCK_BITSTREAM lock_params = { 0 };
1105
    NVENCSTATUS nv_status;
1106
    int res = 0;
1107
 
1108
    switch (avctx->codec->id) {
1109
    case AV_CODEC_ID_H264:
1110
      slice_mode_data = ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData;
1111
      break;
1112
    case AV_CODEC_ID_H265:
1113
      slice_mode_data = ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData;
1114
      break;
1115
    default:
1116
      av_log(avctx, AV_LOG_ERROR, "nvenc: Unknown codec name\n");
1117
      res = AVERROR(EINVAL);
1118
      goto error;
1119
    }
1120
    slice_offsets = av_mallocz(slice_mode_data * sizeof(*slice_offsets));
1121
 
1122
    if (!slice_offsets)
1123
        return AVERROR(ENOMEM);
1124
 
1125
    lock_params.version = NV_ENC_LOCK_BITSTREAM_VER;
1126
 
1127
    lock_params.doNotWait = 0;
1128
    lock_params.outputBitstream = tmpoutsurf->output_surface;
1129
    lock_params.sliceOffsets = slice_offsets;
1130
 
1131
    nv_status = p_nvenc->nvEncLockBitstream(ctx->nvencoder, &lock_params);
1132
    if (nv_status != NV_ENC_SUCCESS) {
1133
        av_log(avctx, AV_LOG_ERROR, "Failed locking bitstream buffer\n");
1134
        res = AVERROR_EXTERNAL;
1135
        goto error;
1136
    }
1137
 
1138
    if (res = ff_alloc_packet2(avctx, pkt, lock_params.bitstreamSizeInBytes, 0)) {
1139
        p_nvenc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->output_surface);
1140
        goto error;
1141
    }
1142
 
1143
    memcpy(pkt->data, lock_params.bitstreamBufferPtr, lock_params.bitstreamSizeInBytes);
1144
 
1145
    nv_status = p_nvenc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->output_surface);
1146
    if (nv_status != NV_ENC_SUCCESS)
1147
        av_log(avctx, AV_LOG_ERROR, "Failed unlocking bitstream buffer, expect the gates of mordor to open\n");
1148
 
1149
    switch (lock_params.pictureType) {
1150
    case NV_ENC_PIC_TYPE_IDR:
1151
        pkt->flags |= AV_PKT_FLAG_KEY;
1152
#if FF_API_CODED_FRAME
1153
FF_DISABLE_DEPRECATION_WARNINGS
1154
    case NV_ENC_PIC_TYPE_I:
1155
        avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
1156
        break;
1157
    case NV_ENC_PIC_TYPE_P:
1158
        avctx->coded_frame->pict_type = AV_PICTURE_TYPE_P;
1159
        break;
1160
    case NV_ENC_PIC_TYPE_B:
1161
        avctx->coded_frame->pict_type = AV_PICTURE_TYPE_B;
1162
        break;
1163
    case NV_ENC_PIC_TYPE_BI:
1164
        avctx->coded_frame->pict_type = AV_PICTURE_TYPE_BI;
1165
        break;
1166
    default:
1167
        av_log(avctx, AV_LOG_ERROR, "Unknown picture type encountered, expect the output to be broken.\n");
1168
        av_log(avctx, AV_LOG_ERROR, "Please report this error and include as much information on how to reproduce it as possible.\n");
1169
        res = AVERROR_EXTERNAL;
1170
        goto error;
1171
FF_ENABLE_DEPRECATION_WARNINGS
1172
#endif
1173
    }
1174
 
1175
    pkt->pts = lock_params.outputTimeStamp;
1176
    pkt->dts = timestamp_queue_dequeue(&ctx->timestamp_list);
1177
 
1178
    /* when there're b frame(s), set dts offset */
1179
    if (ctx->encode_config.frameIntervalP >= 2)
1180
        pkt->dts -= 1;
1181
 
1182
    if (pkt->dts > pkt->pts)
1183
        pkt->dts = pkt->pts;
1184
 
1185
    if (ctx->last_dts != AV_NOPTS_VALUE && pkt->dts <= ctx->last_dts)
1186
        pkt->dts = ctx->last_dts + 1;
1187
 
1188
    ctx->last_dts = pkt->dts;
1189
 
1190
    av_free(slice_offsets);
1191
 
1192
    return 0;
1193
 
1194
error:
1195
 
1196
    av_free(slice_offsets);
1197
    timestamp_queue_dequeue(&ctx->timestamp_list);
1198
 
1199
    return res;
1200
}
1201
 
1202
static int nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
1203
    const AVFrame *frame, int *got_packet)
1204
{
1205
    NVENCSTATUS nv_status;
1206
    NvencOutputSurface *tmpoutsurf;
1207
    int res, i = 0;
1208
 
1209
    NvencContext *ctx = avctx->priv_data;
1210
    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1211
    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1212
 
1213
    NV_ENC_PIC_PARAMS pic_params = { 0 };
1214
    pic_params.version = NV_ENC_PIC_PARAMS_VER;
1215
 
1216
    if (frame) {
1217
        NV_ENC_LOCK_INPUT_BUFFER lockBufferParams = { 0 };
1218
        NvencInputSurface *inSurf = NULL;
1219
 
1220
        for (i = 0; i < ctx->max_surface_count; ++i) {
1221
            if (!ctx->input_surfaces[i].lockCount) {
1222
                inSurf = &ctx->input_surfaces[i];
1223
                break;
1224
            }
1225
        }
1226
 
1227
        av_assert0(inSurf);
1228
 
1229
        inSurf->lockCount = 1;
1230
 
1231
        lockBufferParams.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
1232
        lockBufferParams.inputBuffer = inSurf->input_surface;
1233
 
1234
        nv_status = p_nvenc->nvEncLockInputBuffer(ctx->nvencoder, &lockBufferParams);
1235
        if (nv_status != NV_ENC_SUCCESS) {
1236
            av_log(avctx, AV_LOG_ERROR, "Failed locking nvenc input buffer\n");
1237
            return 0;
1238
        }
1239
 
1240
        if (avctx->pix_fmt == AV_PIX_FMT_YUV420P) {
1241
            uint8_t *buf = lockBufferParams.bufferDataPtr;
1242
 
1243
            av_image_copy_plane(buf, lockBufferParams.pitch,
1244
                frame->data[0], frame->linesize[0],
1245
                avctx->width, avctx->height);
1246
 
1247
            buf += inSurf->height * lockBufferParams.pitch;
1248
 
1249
            av_image_copy_plane(buf, lockBufferParams.pitch >> 1,
1250
                frame->data[2], frame->linesize[2],
1251
                avctx->width >> 1, avctx->height >> 1);
1252
 
1253
            buf += (inSurf->height * lockBufferParams.pitch) >> 2;
1254
 
1255
            av_image_copy_plane(buf, lockBufferParams.pitch >> 1,
1256
                frame->data[1], frame->linesize[1],
1257
                avctx->width >> 1, avctx->height >> 1);
1258
        } else if (avctx->pix_fmt == AV_PIX_FMT_NV12) {
1259
            uint8_t *buf = lockBufferParams.bufferDataPtr;
1260
 
1261
            av_image_copy_plane(buf, lockBufferParams.pitch,
1262
                frame->data[0], frame->linesize[0],
1263
                avctx->width, avctx->height);
1264
 
1265
            buf += inSurf->height * lockBufferParams.pitch;
1266
 
1267
            av_image_copy_plane(buf, lockBufferParams.pitch,
1268
                frame->data[1], frame->linesize[1],
1269
                avctx->width, avctx->height >> 1);
1270
        } else if (avctx->pix_fmt == AV_PIX_FMT_YUV444P) {
1271
            uint8_t *buf = lockBufferParams.bufferDataPtr;
1272
 
1273
            av_image_copy_plane(buf, lockBufferParams.pitch,
1274
                frame->data[0], frame->linesize[0],
1275
                avctx->width, avctx->height);
1276
 
1277
            buf += inSurf->height * lockBufferParams.pitch;
1278
 
1279
            av_image_copy_plane(buf, lockBufferParams.pitch,
1280
                frame->data[1], frame->linesize[1],
1281
                avctx->width, avctx->height);
1282
 
1283
            buf += inSurf->height * lockBufferParams.pitch;
1284
 
1285
            av_image_copy_plane(buf, lockBufferParams.pitch,
1286
                frame->data[2], frame->linesize[2],
1287
                avctx->width, avctx->height);
1288
        } else {
1289
            av_log(avctx, AV_LOG_FATAL, "Invalid pixel format!\n");
1290
            return AVERROR(EINVAL);
1291
        }
1292
 
1293
        nv_status = p_nvenc->nvEncUnlockInputBuffer(ctx->nvencoder, inSurf->input_surface);
1294
        if (nv_status != NV_ENC_SUCCESS) {
1295
            av_log(avctx, AV_LOG_FATAL, "Failed unlocking input buffer!\n");
1296
            return AVERROR_EXTERNAL;
1297
        }
1298
 
1299
        for (i = 0; i < ctx->max_surface_count; ++i)
1300
            if (!ctx->output_surfaces[i].busy)
1301
                break;
1302
 
1303
        if (i == ctx->max_surface_count) {
1304
            inSurf->lockCount = 0;
1305
            av_log(avctx, AV_LOG_FATAL, "No free output surface found!\n");
1306
            return AVERROR_EXTERNAL;
1307
        }
1308
 
1309
        ctx->output_surfaces[i].input_surface = inSurf;
1310
 
1311
        pic_params.inputBuffer = inSurf->input_surface;
1312
        pic_params.bufferFmt = inSurf->format;
1313
        pic_params.inputWidth = avctx->width;
1314
        pic_params.inputHeight = avctx->height;
1315
        pic_params.outputBitstream = ctx->output_surfaces[i].output_surface;
1316
        pic_params.completionEvent = 0;
1317
 
1318
        if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
1319
            if (frame->top_field_first) {
1320
                pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM;
1321
            } else {
1322
                pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP;
1323
            }
1324
        } else {
1325
            pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
1326
        }
1327
 
1328
        pic_params.encodePicFlags = 0;
1329
        pic_params.inputTimeStamp = frame->pts;
1330
        pic_params.inputDuration = 0;
1331
        switch (avctx->codec->id) {
1332
        case AV_CODEC_ID_H264:
1333
          pic_params.codecPicParams.h264PicParams.sliceMode = ctx->encode_config.encodeCodecConfig.h264Config.sliceMode;
1334
          pic_params.codecPicParams.h264PicParams.sliceModeData = ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData;
1335
          break;
1336
        case AV_CODEC_ID_H265:
1337
          pic_params.codecPicParams.hevcPicParams.sliceMode = ctx->encode_config.encodeCodecConfig.hevcConfig.sliceMode;
1338
          pic_params.codecPicParams.hevcPicParams.sliceModeData = ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData;
1339
          break;
1340
        default:
1341
          av_log(avctx, AV_LOG_ERROR, "nvenc: Unknown codec name\n");
1342
          return AVERROR(EINVAL);
1343
        }
1344
 
1345
        res = timestamp_queue_enqueue(&ctx->timestamp_list, frame->pts);
1346
 
1347
        if (res)
1348
            return res;
1349
    } else {
1350
        pic_params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
1351
    }
1352
 
1353
    nv_status = p_nvenc->nvEncEncodePicture(ctx->nvencoder, &pic_params);
1354
 
1355
    if (frame && nv_status == NV_ENC_ERR_NEED_MORE_INPUT) {
1356
        res = out_surf_queue_enqueue(&ctx->output_surface_queue, &ctx->output_surfaces[i]);
1357
 
1358
        if (res)
1359
            return res;
1360
 
1361
        ctx->output_surfaces[i].busy = 1;
1362
    }
1363
 
1364
    if (nv_status != NV_ENC_SUCCESS && nv_status != NV_ENC_ERR_NEED_MORE_INPUT) {
1365
        av_log(avctx, AV_LOG_ERROR, "EncodePicture failed!\n");
1366
        return AVERROR_EXTERNAL;
1367
    }
1368
 
1369
    if (nv_status != NV_ENC_ERR_NEED_MORE_INPUT) {
1370
        while (ctx->output_surface_queue.count) {
1371
            tmpoutsurf = out_surf_queue_dequeue(&ctx->output_surface_queue);
1372
            res = out_surf_queue_enqueue(&ctx->output_surface_ready_queue, tmpoutsurf);
1373
 
1374
            if (res)
1375
                return res;
1376
        }
1377
 
1378
        if (frame) {
1379
            res = out_surf_queue_enqueue(&ctx->output_surface_ready_queue, &ctx->output_surfaces[i]);
1380
 
1381
            if (res)
1382
                return res;
1383
 
1384
            ctx->output_surfaces[i].busy = 1;
1385
        }
1386
    }
1387
 
1388
    if (ctx->output_surface_ready_queue.count && (!frame || ctx->output_surface_ready_queue.count + ctx->output_surface_queue.count >= ctx->buffer_delay)) {
1389
        tmpoutsurf = out_surf_queue_dequeue(&ctx->output_surface_ready_queue);
1390
 
1391
        res = process_output_surface(avctx, pkt, tmpoutsurf);
1392
 
1393
        if (res)
1394
            return res;
1395
 
1396
        tmpoutsurf->busy = 0;
1397
        av_assert0(tmpoutsurf->input_surface->lockCount);
1398
        tmpoutsurf->input_surface->lockCount--;
1399
 
1400
        *got_packet = 1;
1401
    } else {
1402
        *got_packet = 0;
1403
    }
1404
 
1405
    return 0;
1406
}
1407
 
1408
static const enum AVPixelFormat pix_fmts_nvenc[] = {
1409
    AV_PIX_FMT_YUV420P,
1410
    AV_PIX_FMT_NV12,
1411
    AV_PIX_FMT_YUV444P,
1412
    AV_PIX_FMT_NONE
1413
};
1414
 
1415
#define OFFSET(x) offsetof(NvencContext, x)
1416
#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1417
static const AVOption options[] = {
1418
    { "preset", "Set the encoding preset (one of hq, hp, bd, ll, llhq, llhp, default)", OFFSET(preset), AV_OPT_TYPE_STRING, { .str = "hq" }, 0, 0, VE },
1419
    { "profile", "Set the encoding profile (high, main or baseline)", OFFSET(profile), AV_OPT_TYPE_STRING, { 0 }, 0, 0, VE },
1420
    { "level", "Set the encoding level restriction (auto, 1.0, 1.0b, 1.1, 1.2, ..., 4.2, 5.0, 5.1)", OFFSET(level), AV_OPT_TYPE_STRING, { 0 }, 0, 0, VE },
1421
    { "tier", "Set the encoding tier (main or high)", OFFSET(tier), AV_OPT_TYPE_STRING, { 0 }, 0, 0, VE },
1422
    { "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
1423
    { "2pass", "Use 2pass cbr encoding mode", OFFSET(twopass), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE },
1424
    { "gpu", "Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.", OFFSET(gpu), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
1425
    { "delay", "Delays frame output by the given amount of frames.", OFFSET(buffer_delay), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE },
1426
    { NULL }
1427
};
1428
 
1429
static const AVCodecDefault nvenc_defaults[] = {
1430
    { "b", "0" },
1431
    { "qmin", "-1" },
1432
    { "qmax", "-1" },
1433
    { "qdiff", "-1" },
1434
    { "qblur", "-1" },
1435
    { "qcomp", "-1" },
1436
    { NULL },
1437
};
1438
 
1439
#if CONFIG_NVENC_ENCODER
1440
static const AVClass nvenc_class = {
1441
    .class_name = "nvenc",
1442
    .item_name = av_default_item_name,
1443
    .option = options,
1444
    .version = LIBAVUTIL_VERSION_INT,
1445
};
1446
 
1447
AVCodec ff_nvenc_encoder = {
1448
    .name = "nvenc",
1449
    .long_name = NULL_IF_CONFIG_SMALL("Nvidia NVENC h264 encoder"),
1450
    .type = AVMEDIA_TYPE_VIDEO,
1451
    .id = AV_CODEC_ID_H264,
1452
    .priv_data_size = sizeof(NvencContext),
1453
    .init = nvenc_encode_init,
1454
    .encode2 = nvenc_encode_frame,
1455
    .close = nvenc_encode_close,
1456
    .capabilities = AV_CODEC_CAP_DELAY,
1457
    .priv_class = &nvenc_class,
1458
    .defaults = nvenc_defaults,
1459
    .pix_fmts = pix_fmts_nvenc,
1460
};
1461
#endif
1462
 
1463
/* Add an alias for nvenc_h264 */
1464
#if CONFIG_NVENC_H264_ENCODER
1465
static const AVClass nvenc_h264_class = {
1466
    .class_name = "nvenc_h264",
1467
    .item_name = av_default_item_name,
1468
    .option = options,
1469
    .version = LIBAVUTIL_VERSION_INT,
1470
};
1471
 
1472
AVCodec ff_nvenc_h264_encoder = {
1473
    .name = "nvenc_h264",
1474
    .long_name = NULL_IF_CONFIG_SMALL("Nvidia NVENC h264 encoder"),
1475
    .type = AVMEDIA_TYPE_VIDEO,
1476
    .id = AV_CODEC_ID_H264,
1477
    .priv_data_size = sizeof(NvencContext),
1478
    .init = nvenc_encode_init,
1479
    .encode2 = nvenc_encode_frame,
1480
    .close = nvenc_encode_close,
1481
    .capabilities = AV_CODEC_CAP_DELAY,
1482
    .priv_class = &nvenc_h264_class,
1483
    .defaults = nvenc_defaults,
1484
    .pix_fmts = pix_fmts_nvenc,
1485
};
1486
#endif
1487
 
1488
#if CONFIG_NVENC_HEVC_ENCODER
1489
static const AVClass nvenc_hevc_class = {
1490
    .class_name = "nvenc_hevc",
1491
    .item_name = av_default_item_name,
1492
    .option = options,
1493
    .version = LIBAVUTIL_VERSION_INT,
1494
};
1495
 
1496
AVCodec ff_nvenc_hevc_encoder = {
1497
    .name = "nvenc_hevc",
1498
    .long_name = NULL_IF_CONFIG_SMALL("Nvidia NVENC hevc encoder"),
1499
    .type = AVMEDIA_TYPE_VIDEO,
1500
    .id = AV_CODEC_ID_H265,
1501
    .priv_data_size = sizeof(NvencContext),
1502
    .init = nvenc_encode_init,
1503
    .encode2 = nvenc_encode_frame,
1504
    .close = nvenc_encode_close,
1505
    .capabilities = AV_CODEC_CAP_DELAY,
1506
    .priv_class = &nvenc_hevc_class,
1507
    .defaults = nvenc_defaults,
1508
    .pix_fmts = pix_fmts_nvenc,
1509
};
1510
#endif