x264码率控制（二）lookahead

103 阅读 0 评论 68 点赞

我是靠谱客的博主贪玩香菇，这篇文章主要介绍x264码率控制（二）lookahead，现在分享给大家，希望可以做个参考。

x264 码率控制，前面一中讲到，是通过rcc->last_satd计算当前帧qp值的。那么last_satd如何得到的呢？

last_satd = int x264_rc_analyse_slice( x264_t *h );

函数中把每一行计算的row_satd 累加起来。

每一行中的row_satd如何得到的呢？

lookahead线程启动：

lookahead_thread（）->lookahead_thread_internal()//注意这里不是说的帧内lookahead,这里指的是lookahead内部实现

-->lookahead_slicetype_decide()// lookahead内部决定了最终的编码帧类型

-->x264_slicetype_analyse()

if( h->param.rc.b_mb_tree )
macroblock_tree( h, &a, frames, X264_MIN(num_frames, h->param.i_keyint_max), keyframe );

while( i-- > idx )
{
cur_nonb = i;
while( IS_X264_TYPE_B( frames[cur_nonb]->i_type ) && cur_nonb > 0 )
cur_nonb--; // 如果是B帧则跳过去
if( cur_nonb < idx )
break;
slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, last_nonb );

}

//核心函数slicetype_frame_cost(h,a,frames,cur_nonb,last_nonb,last_nonb);

static int slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
x264_frame_t **frames, int p0, int p1, int b )
{// 由于外层是一个循环，在来回调用，这里先判断下，之前是不是计算过，免得重复计算
int i_score = 0;
int do_search[2];
const x264_weight_t *w = x264_weight_none;
x264_frame_t *fenc = frames[b];

/* Check whether we already evaluated this frame
* If we have tried this frame as P, then we have also tried
* the preceding frames as B. (is this still true?) */
/* Also check that we already calculated the row SATDs for the current frame. */
if( fenc->i_cost_est[b-p0][p1-b] >= 0 &&
(!h->param.rc.i_vbv_buffer_size || fenc->i_row_satds[b-p0][p1-b][0] != -1) )
{// 之前已经计算过了
i_score = fenc->i_cost_est[b-p0][p1-b];// 初始值，如果已经计算过，就直接返回i_score
}

else

{

// 启动多线程计算cost

for( int i = 0; i < h->param.i_lookahead_threads; i++ )
{
x264_t *t = h->lookahead_thread[i];

/* FIXME move this somewhere else */
t->mb.i_me_method = h->mb.i_me_method;
t->mb.i_subpel_refine = h->mb.i_subpel_refine;
t->mb.b_chroma_me = h->mb.b_chroma_me;

s[i] = (x264_slicetype_slice_t){ t, a, frames, p0, p1, b, dist_scale_factor, do_search, w,
output_inter[i], output_intra[i] };

t->i_threadslice_start = ((h->mb.i_mb_height * i + h->param.i_lookahead_threads/2) / h->param.i_lookahead_threads);
t->i_threadslice_end = ((h->mb.i_mb_height * (i+1) + h->param.i_lookahead_threads/2) / h->param.i_lookahead_threads);

int thread_height = t->i_threadslice_end - t->i_threadslice_start;
int thread_output_size = thread_height + NUM_INTS;
memset( output_inter[i], 0, thread_output_size * sizeof(int) );
memset( output_intra[i], 0, thread_output_size * sizeof(int) );
output_inter[i][NUM_ROWS] = output_intra[i][NUM_ROWS] = thread_height;

output_inter[i+1] = output_inter[i] + thread_output_size + PAD_SIZE;
output_intra[i+1] = output_intra[i] + thread_output_size + PAD_SIZE;

x264_threadpool_run( h->lookaheadpool, (void*)slicetype_slice_cost, &s[i] );
}

for( int i = 0; i < h->param.i_lookahead_threads; i++ )
x264_threadpool_wait( h->lookaheadpool, &s[i] );

//等待多线程计算结束

}

// slice cost计算

static void slicetype_slice_cost( x264_slicetype_slice_t *s )
{
x264_t *h = s->h;

/* Lowres lookahead goes backwards because the MVs are used as predictors in the main encode.
* This considerably improves MV prediction overall. */

/* The edge mbs seem to reduce the predictive quality of the
* whole frame's score, but are needed for a spatial distribution. */
int do_edges = h->param.rc.b_mb_tree || h->param.rc.i_vbv_buffer_size || h->mb.i_mb_width <= 2 || h->mb.i_mb_height <= 2;

int start_y = X264_MIN( h->i_threadslice_end - 1, h->mb.i_mb_height - 2 + do_edges );
int end_y = X264_MAX( h->i_threadslice_start, 1 - do_edges );
int start_x = h->mb.i_mb_width - 2 + do_edges;
int end_x = 1 - do_edges;

for( h->mb.i_mb_y = start_y; h->mb.i_mb_y >= end_y; h->mb.i_mb_y-- )
for( h->mb.i_mb_x = start_x; h->mb.i_mb_x >= end_x; h->mb.i_mb_x-- )
slicetype_mb_cost( h, s->a, s->frames, s->p0, s->p1, s->b, s->dist_scale_factor,
s->do_search, s->w, s->output_inter, s->output_intra );
}

//mbcmp 系列的函数

static void mbcmp_init( x264_t *h )
{
int satd = !h->mb.b_lossless && h->param.analyse.i_subpel_refine > 1;
memcpy( h->pixf.mbcmp, satd ? h->pixf.satd : h->pixf.sad_aligned, sizeof(h->pixf.mbcmp) );
memcpy( h->pixf.mbcmp_unaligned, satd ? h->pixf.satd : h->pixf.sad, sizeof(h->pixf.mbcmp_unaligned) );
h->pixf.intra_mbcmp_x3_16x16 = satd ? h->pixf.intra_satd_x3_16x16 : h->pixf.intra_sad_x3_16x16;
h->pixf.intra_mbcmp_x3_8x16c = satd ? h->pixf.intra_satd_x3_8x16c : h->pixf.intra_sad_x3_8x16c;
h->pixf.intra_mbcmp_x3_8x8c = satd ? h->pixf.intra_satd_x3_8x8c : h->pixf.intra_sad_x3_8x8c;
h->pixf.intra_mbcmp_x3_8x8 = satd ? h->pixf.intra_sa8d_x3_8x8 : h->pixf.intra_sad_x3_8x8;
h->pixf.intra_mbcmp_x3_4x4 = satd ? h->pixf.intra_satd_x3_4x4 : h->pixf.intra_sad_x3_4x4;
h->pixf.intra_mbcmp_x9_4x4 = h->param.b_cpu_independent || h->mb.b_lossless ? NULL
: satd ? h->pixf.intra_satd_x9_4x4 : h->pixf.intra_sad_x9_4x4;
h->pixf.intra_mbcmp_x9_8x8 = h->param.b_cpu_independent || h->mb.b_lossless ? NULL
: satd ? h->pixf.intra_sa8d_x9_8x8 : h->pixf.intra_sad_x9_8x8;
satd &= h->param.analyse.i_me_method == X264_ME_TESA;
memcpy( h->pixf.fpelcmp, satd ? h->pixf.satd : h->pixf.sad, sizeof(h->pixf.fpelcmp) );
memcpy( h->pixf.fpelcmp_x3, satd ? h->pixf.satd_x3 : h->pixf.sad_x3, sizeof(h->pixf.fpelcmp_x3) );
memcpy( h->pixf.fpelcmp_x4, satd ? h->pixf.satd_x4 : h->pixf.sad_x4, sizeof(h->pixf.fpelcmp_x4) );
}

static void slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
x264_frame_t **frames, int p0, int p1, int b,
int dist_scale_factor, int do_search[2], const x264_weight_t *w,
int *output_inter, int *output_intra )
{// 计算结果最终填充在output_inter,output_intra,计算过程略复杂，后续再继续走读
x264_frame_t *fref0 = frames[p0];
x264_frame_t *fref1 = frames[p1];
x264_frame_t *fenc = frames[b];
   // 先取得当前编码帧，和前后参考帧
   const int b_bidir = (b < p1);
const int i_mb_x = h->mb.i_mb_x;
const int i_mb_y = h->mb.i_mb_y;
const int i_mb_stride = h->mb.i_mb_width;
const int i_mb_xy = i_mb_x + i_mb_y * i_mb_stride;
const int i_stride = fenc->i_stride_lowres;
const int i_pel_offset = 8 * (i_mb_x + i_mb_y * i_stride);
const int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32;
int16_t (*fenc_mvs[2])[2] = { &fenc->lowres_mvs[0][b-p0-1][i_mb_xy], &fenc->lowres_mvs[1][p1-b-1][i_mb_xy] };
int (*fenc_costs[2]) = { &fenc->lowres_mv_costs[0][b-p0-1][i_mb_xy], &fenc->lowres_mv_costs[1][p1-b-1][i_mb_xy] };
   // 半分辨率的cost初始化
int b_frame_score_mb = (i_mb_x > 0 && i_mb_x < h->mb.i_mb_width - 1 &&
i_mb_y > 0 && i_mb_y < h->mb.i_mb_height - 1) ||
h->mb.i_mb_width <= 2 || h->mb.i_mb_height <= 2;

// 计算cost

}

//再看看x264_rc_analyse_slice() 如何读取结果的

cost = frames[b]->i_cost_est[b-p0][p1-b];
assert( cost >= 0 );

if( h->param.rc.b_mb_tree && !h->param.rc.b_stat_read )
{
cost = slicetype_frame_cost_recalculate( h, frames, p0, p1, b );
if( b && h->param.rc.i_vbv_buffer_size )
slicetype_frame_cost_recalculate( h, frames, b, b, b );
}
/* In AQ, use the weighted score instead. */
else if( h->param.rc.i_aq_mode )
cost = frames[b]->i_cost_est_aq[b-p0][p1-b];

return cost;// 返回lookahead中计算的cost

//下面这个开关一般不会开，把I帧分摊到P帧里面，就先不走读了，看了下，没太明白。

if( h->param.b_intra_refresh && h->param.rc.i_vbv_buffer_size && h->fenc->i_type == X264_TYPE_P )
{
int ip_factor = 256 * h->param.rc.f_ip_factor; /* fix8 */
for( int y = 0; y < h->mb.i_mb_height; y++ )
{
int mb_xy = y * h->mb.i_mb_stride + h->fdec->i_pir_start_col;
for( int x = h->fdec->i_pir_start_col; x <= h->fdec->i_pir_end_col; x++, mb_xy++ )
{
int intra_cost = (h->fenc->i_intra_cost[mb_xy] * ip_factor + 128) >> 8;
int inter_cost = h->fenc->lowres_costs[b-p0][p1-b][mb_xy] & LOWRES_COST_MASK;
int diff = intra_cost - inter_cost; // 帧内参考减去帧间残差，这里为何这样计算，有待考究。
if( h->param.rc.i_aq_mode )
h->fdec->i_row_satd[y] += (diff * frames[b]->i_inv_qscale_factor[mb_xy] + 128) >> 8;
else
h->fdec->i_row_satd[y] += diff;
cost += diff;
}
}
}