18 ffmpeg如何使用libx264

80 阅读 0 评论 53 点赞

我是靠谱客的博主欢喜白羊，最近开发中收集的这篇文章主要介绍18 ffmpeg如何使用libx264，觉得挺不错的，现在分享给大家，希望可以做个参考。

概述

每个编解码的协议实现都需要有一个结构体：

AVCodec ff_libx264_encoder = {
.name
= "libx264",
.type
= AVMEDIA_TYPE_VIDEO,
.id
= CODEC_ID_H264,
.priv_data_size = sizeof(X264Context),
.init
= X264_init,
.encode
= X264_frame,
.close
= X264_close,
.capabilities
= CODEC_CAP_DELAY,
.pix_fmts
= (const enum PixelFormat[]) { PIX_FMT_YUV420P, PIX_FMT_YUVJ420P, PIX_FMT_NONE },
.long_name
= NULL_IF_CONFIG_SMALL("libx264 H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
.priv_class
= &class,
};

这个就相当于C++的类，里面有私有变量，也有虚拟函数。
通过id来区分多个不同的编码。
priv_data_size 这个相对于一个私有变量，只有这个实现来使用。内容是一个结构体,存储在avctx->priv_data中。

typedef struct X264Context {
AVClass
*class;
x264_param_t
params;
x264_t
*enc;
x264_picture_t
pic;
uint8_t
*sei;
int
sei_size;
AVFrame
out_pic;
char *preset;
char *tune;
char *profile;
char *level;
int fastfirstpass;
char *stats;
char *weightp;
char *x264opts;
} X264Context;

有三个最重要的函数，init，encode和close。每一个编解码协议都有不同的实现。从init函数开始：

init 函数

init函数参数是AVCodecContext avctx，主要分为三部分：prama的赋值，x264 open，最后encode header。

首先把avctx的param赋值给X264Context的param。参数比较多,这些参数如果不用起来，是不需要知道的。

static av_cold int X264_init(AVCodecContext *avctx)
{
X264Context *x4 = avctx->priv_data;
x4->sei_size = 0;
x264_param_default(&x4->params);
x4->params.i_keyint_max
= avctx->gop_size;
...

x264 open，赋值给X264Context.enc。


x4->enc = x264_encoder_open(&x4->params);
if (!x4->enc)
return -1;

encoder header，并拷贝到extradata中。使用的是encode_nals函数进行拷贝数据的。


avctx->coded_frame = &x4->out_pic;
if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
x264_nal_t *nal;
int nnal, s, i;
s = x264_encoder_headers(x4->enc, &nal, &nnal);
for (i = 0; i < nnal; i++)
if (nal[i].i_type == NAL_SEI)
av_log(avctx, AV_LOG_INFO, "%sn", nal[i].p_payload+25);
avctx->extradata
= av_malloc(s);
avctx->extradata_size = encode_nals(avctx, avctx->extradata, s, nal, nnal, 1);
}
return 0;
}

对于编码出的数据，用encode_nals进行封装的。主要分为二部分，主要在于对SEI的逻辑：

SEI为H264的补充增强信息，SEI的数据要放到内存块的最前面，首先cpy SEI。

static int encode_nals(AVCodecContext *ctx, uint8_t *buf, int size,
x264_nal_t *nals, int nnal, int skip_sei)
{
X264Context *x4 = ctx->priv_data;
uint8_t *p = buf;
int i;
/* Write the SEI as part of the first frame. */
if (x4->sei_size > 0 && nnal > 0) {
memcpy(p, x4->sei, x4->sei_size);
p += x4->sei_size;
x4->sei_size = 0;
// why is x4->sei not freed?
}

当i_type为NAL_SEI时，存储在x4->sei中。当不是NAL_SEI时则放入到输出内存buf中。


for (i = 0; i < nnal; i++){
/* Don't put the SEI in extradata. */
if (skip_sei && nals[i].i_type == NAL_SEI) {
x4->sei_size = nals[i].i_payload;
x4->sei
= av_malloc(x4->sei_size);
memcpy(x4->sei, nals[i].p_payload, nals[i].i_payload);
continue;
}
memcpy(p, nals[i].p_payload, nals[i].i_payload);
p += nals[i].i_payload;
}
return p - buf;
}

encode函数

encode函数是编码的逻辑,使用了X264的编码功能函数。X264库写得很好，使用起来很简单哦。encode函数输入为原始数据和长度，输出是编码后的数据存储在buf中，并返回长度。主要分成了三部分。

初始化x4->pic,保存原始数据。后调用了x264_encoder_reconfig来更新param。

static int X264_frame(AVCodecContext *ctx, uint8_t *buf,
int orig_bufsize, void *data)
{
X264Context *x4 = ctx->priv_data;
AVFrame *frame = data;
x264_nal_t *nal;
int nnal, i;
x264_picture_t pic_out;
int bufsize;
x264_picture_init( &x4->pic );
x4->pic.img.i_csp
= X264_CSP_I420;
x4->pic.img.i_plane = 3;
if (frame) {
for (i = 0; i < 3; i++) {
x4->pic.img.plane[i]
= frame->data[i];
x4->pic.img.i_stride[i] = frame->linesize[i];
}
x4->pic.i_pts
= frame->pts;
x4->pic.i_type =
frame->pict_type == AV_PICTURE_TYPE_I ? X264_TYPE_KEYFRAME :
frame->pict_type == AV_PICTURE_TYPE_P ? X264_TYPE_P :
frame->pict_type == AV_PICTURE_TYPE_B ? X264_TYPE_B :
X264_TYPE_AUTO;
if (x4->params.b_tff != frame->top_field_first) {
x4->params.b_tff = frame->top_field_first;
x264_encoder_reconfig(x4->enc, &x4->params);
}
if (x4->params.vui.i_sar_height != ctx->sample_aspect_ratio.den
|| x4->params.vui.i_sar_width != ctx->sample_aspect_ratio.num) {
x4->params.vui.i_sar_height = ctx->sample_aspect_ratio.den;
x4->params.vui.i_sar_width = ctx->sample_aspect_ratio.num;
x264_encoder_reconfig(x4->enc, &x4->params);
}
}

使用x264_encoder_encode对原始数据x4->pic进行编码，编码后的的数据在nal中，长度为nnal。并通过encode_nals存储在buf中。pic_out是一些编码后其他辅助的信息。


do {
bufsize = orig_bufsize;
if (x264_encoder_encode(x4->enc, &nal, &nnal, frame? &x4->pic: NULL, &pic_out) < 0)
return -1;
bufsize = encode_nals(ctx, buf, bufsize, nal, nnal, 0);
if (bufsize < 0)
return -1;
} while (!bufsize && !frame && x264_encoder_delayed_frames(x4->enc));

x4->out_pic为AVFrame结构，通过pic_out给它赋值。


/* FIXME: libx264 now provides DTS, but AVFrame doesn't have a field for it. */
x4->out_pic.pts = pic_out.i_pts;
switch (pic_out.i_type) {
case X264_TYPE_IDR:
case X264_TYPE_I:
x4->out_pic.pict_type = AV_PICTURE_TYPE_I;
break;
case X264_TYPE_P:
x4->out_pic.pict_type = AV_PICTURE_TYPE_P;
break;
case X264_TYPE_B:
case X264_TYPE_BREF:
x4->out_pic.pict_type = AV_PICTURE_TYPE_B;
break;
}
x4->out_pic.key_frame = pic_out.b_keyframe;
if (bufsize)
x4->out_pic.quality = (pic_out.i_qpplus1 - 1) * FF_QP2LAMBDA;
return bufsize;
}

最后close函数是收尾工作，搞起来就很简单了。

static av_cold int X264_close(AVCodecContext *avctx)
{
X264Context *x4 = avctx->priv_data;
av_freep(&avctx->extradata);
av_free(x4->sei);
if (x4->enc)
x264_encoder_close(x4->enc);
return 0;
}

其他的主要是一些配置，这些配置可以先忽略，等我真正使用的时候再说吧。

使用

这三个函数的参数都离不开AVCodecContext，所以这三个函数的上层函数也是围绕着 AVCodecContext 来进行的。

avctx->codec->init 在 int attribute_align_arg avcodec_open2(AVCodecContext *avctx, AVCodec *codec, AVDictionary **options) 中调用，编解码器打开的函数中。
avctx->codec->encode 在 avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf, int buf_size,const AVFrame *pict) 中调用，进行编码操作。
avctx->codec->close 在 int avcodec_close(AVCodecContext *avctx) 中调用。

理清了这个，ffmpeg的编码的重点部分就清晰了。