x265 传递残差计算

89 阅读 0 评论 59 点赞

我是靠谱客的博主单身大雁，这篇文章主要介绍x265 传递残差计算，现在分享给大家，希望可以做个参考。

一传递残差的作用

传递残差最终会直接增加到当前帧的Cost上，影响了最终的码控，本文分析传递残差计算过程

二代码详细分析

传递残差迭代过程

复制代码

void Lookahead::estimateCUPropagate(Lowres **frames, double averageDuration, int p0, int p1, int b, int referenced)
{
/*
1 帧序列
2 平均duration
3 前向帧
4 后向帧
5 当前帧
6 是否被参考了
*/
uint16_t *refCosts[2] = { frames[p0]->propagateCost, frames[p1]->propagateCost };
int32_t distScaleFactor = (((b - p0) << 8) + ((p1 - p0) >> 1)) / (p1 - p0);
int32_t bipredWeight = m_param->bEnableWeightedBiPred ? 64 - (distScaleFactor >> 2) : 32;
int32_t bipredWeights[2] = { bipredWeight, 64 - bipredWeight }; //计算双向权重
int listDist[2] = { b - p0, p1 - b };
memset(m_scratch, 0, m_8x8Width * sizeof(int));
uint16_t *propagateCost = frames[b]->propagateCost; //传递残差指针
s265_emms();
double fpsFactor = CLIP_DURATION((double)m_param->fpsDenom / m_param->fpsNum) / CLIP_DURATION(averageDuration);
//duration factor，看下是否是不均匀的帧率，正常情况下是1
/* For non-referred frames the source costs are always zero, so just memset one row and re-use it. */
if (!referenced) //如果非参考
memset(frames[b]->propagateCost, 0, m_8x8Width * sizeof(uint16_t)); //
int32_t strideInCU = m_8x8Width; //以cu为单位的行宽
for (uint16_t blocky = 0; blocky < m_8x8Height; blocky++) //
{
int cuIndex = blocky * strideInCU; //遍历每一行
if (m_param->rc.qgSize == 8) //如果qgSize == 8
primitives.propagateCost(m_scratch, propagateCost,
frames[b]->intraCost + cuIndex, frames[b]->lowresCosts[b - p0][p1 - b] + cuIndex,
frames[b]->invQscaleFactor8x8 + cuIndex, &fpsFactor, m_8x8Width);
//计算传递残差
else
primitives.propagateCost(m_scratch, propagateCost,
frames[b]->intraCost + cuIndex, frames[b]->lowresCosts[b - p0][p1 - b] + cuIndex,
frames[b]->invQscaleFactor + cuIndex, &fpsFactor, m_8x8Width);
//
if (referenced) //如果是参考帧，
propagateCost += m_8x8Width; //偏移一行，要不就是复用的
for (uint16_t blockx = 0; blockx < m_8x8Width; blockx++, cuIndex++) //遍历每一行的每个块
{
int32_t propagate_amount = m_scratch[blockx];
/* Don't propagate for an intra block. */
if (propagate_amount > 0) //propagate_amount 传递次数
{
/* Access width-2 bitfield. */
int32_t lists_used = frames[b]->lowresCosts[b - p0][p1 - b][cuIndex] >> LOWRES_COST_SHIFT; //
//最高两位存放 前向和后向使用情况， 一共16位的lowresCosts, 后14真的存放的是cost，前2位存放的是方向信息
lowresCosts ,画面在1/4分辨率时候的Cost , 右移位 14，
/* Follow the MVs to the previous frame(s). */
for (uint16_t list = 0; list < 2; list++) //0， 1 前向/后向参考传递
{
if ((lists_used >> list) & 1) //为1 表示有这个方向上的。 当list为0,
{
#define CLIP_ADD(s, x) (s) = (uint16_t)S265_MIN((s) + (x), (1 << 16) - 1)
int32_t listamount = propagate_amount;
/* Apply bipred weighting. */
if (lists_used == 3) //双向的
listamount = (listamount * bipredWeights[list] + 32) >> 6; //双向的需要调整，根据双向权重值
MV *mvs = frames[b]->lowresMvs[list][listDist[list]]; //拿到一个方向上的mv值
/* Early termination for simple case of mv 0. */
if (!mvs[cuIndex].word) //如果mv是0， 说明是
{
CLIP_ADD(refCosts[list][cuIndex], listamount);//直接把当前块的cost增大一些，当然不能超过1 << 16
//因为这里没有运动向量的cost, 所以只需要加上传递残差
continue;
}
int32_t x = mvs[cuIndex].x;
int32_t y = mvs[cuIndex].y;
int32_t cux = (x >> 5) + blockx;
int32_t cuy = (y >> 5) + blocky;
int32_t idx0 = cux + cuy * strideInCU; //当前块位置
int32_t idx1 = idx0 + 1;//当前块前一个块
int32_t idx2 = idx0 + strideInCU;// 当前块下面一个块
int32_t idx3 = idx0 + strideInCU + 1; //当前块，前面下方一个块
/*
D(当前块)
idx1
idx2
idx3
*/
x &= 31;
y &= 31;
int32_t idx0weight = (32 - y) * (32 - x);
int32_t idx1weight = (32 - y) * x;
int32_t idx2weight = y * (32 - x);
int32_t idx3weight = y * x;
/* We could just clip the MVs, but pixels that lie outside the frame probably shouldn't
* be counted. */
if (cux < m_8x8Width - 1 && cuy < m_8x8Height - 1 && cux >= 0 && cuy >= 0)
{ //给这些块，都加上传递残差的影响 ,在边界范围内的
CLIP_ADD(refCosts[list][idx0], (listamount * idx0weight + 512) >> 10);
CLIP_ADD(refCosts[list][idx1], (listamount * idx1weight + 512) >> 10);
CLIP_ADD(refCosts[list][idx2], (listamount * idx2weight + 512) >> 10);
CLIP_ADD(refCosts[list][idx3], (listamount * idx3weight + 512) >> 10);
}
else /* Check offsets individually
出界的，需要逐个判断，要不然idx 下标访问越界*/
{
if (cux < m_8x8Width && cuy < m_8x8Height && cux >= 0 && cuy >= 0)
CLIP_ADD(refCosts[list][idx0], (listamount * idx0weight + 512) >> 10);
if (cux + 1 < m_8x8Width && cuy < m_8x8Height && cux + 1 >= 0 && cuy >= 0)
CLIP_ADD(refCosts[list][idx1], (listamount * idx1weight + 512) >> 10);
if (cux < m_8x8Width && cuy + 1 < m_8x8Height && cux >= 0 && cuy + 1 >= 0)
CLIP_ADD(refCosts[list][idx2], (listamount * idx2weight + 512) >> 10);
if (cux + 1 < m_8x8Width && cuy + 1 < m_8x8Height && cux + 1 >= 0 && cuy + 1 >= 0)
CLIP_ADD(refCosts[list][idx3], (listamount * idx3weight + 512) >> 10);
}
}
}
}
}
}
if (m_param->rc.vbvBufferSize && m_param->lookaheadDepth && referenced)
cuTreeFinish(frames[b], averageDuration, b == p1 ? b - p0 : 0);
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
void Lookahead::estimateCUPropagate(Lowres **frames, double averageDuration, int p0, int p1, int b, int referenced)
{
/*
1 帧序列
2 平均duration
3 前向帧
4 后向帧
5 当前帧
6 是否被参考了
*/
uint16_t *refCosts[2] = { frames[p0]->propagateCost, frames[p1]->propagateCost };
int32_t distScaleFactor = (((b - p0) << 8) + ((p1 - p0) >> 1)) / (p1 - p0);
int32_t bipredWeight = m_param->bEnableWeightedBiPred ? 64 - (distScaleFactor >> 2) : 32;
int32_t bipredWeights[2] = { bipredWeight, 64 - bipredWeight }; //计算双向权重
int listDist[2] = { b - p0, p1 - b };
memset(m_scratch, 0, m_8x8Width * sizeof(int));
uint16_t *propagateCost = frames[b]->propagateCost; //传递残差指针
s265_emms();
double fpsFactor = CLIP_DURATION((double)m_param->fpsDenom / m_param->fpsNum) / CLIP_DURATION(averageDuration);
//duration factor，看下是否是不均匀的帧率，正常情况下是1
/* For non-referred frames the source costs are always zero, so just memset one row and re-use it. */
if (!referenced) //如果非参考
memset(frames[b]->propagateCost, 0, m_8x8Width * sizeof(uint16_t)); //
int32_t strideInCU = m_8x8Width; //以cu为单位的行宽
for (uint16_t blocky = 0; blocky < m_8x8Height; blocky++) //
{
int cuIndex = blocky * strideInCU; //遍历每一行
if (m_param->rc.qgSize == 8) //如果qgSize == 8
primitives.propagateCost(m_scratch, propagateCost,
frames[b]->intraCost + cuIndex, frames[b]->lowresCosts[b - p0][p1 - b] + cuIndex,
frames[b]->invQscaleFactor8x8 + cuIndex, &fpsFactor, m_8x8Width);
//计算传递残差
else
primitives.propagateCost(m_scratch, propagateCost,
frames[b]->intraCost + cuIndex, frames[b]->lowresCosts[b - p0][p1 - b] + cuIndex,
frames[b]->invQscaleFactor + cuIndex, &fpsFactor, m_8x8Width);
//
if (referenced) //如果是参考帧，
propagateCost += m_8x8Width; //偏移一行，要不就是复用的
for (uint16_t blockx = 0; blockx < m_8x8Width; blockx++, cuIndex++) //遍历每一行的每个块
{
int32_t propagate_amount = m_scratch[blockx];
/* Don't propagate for an intra block. */
if (propagate_amount > 0) //propagate_amount 传递次数
{
/* Access width-2 bitfield. */
int32_t lists_used = frames[b]->lowresCosts[b - p0][p1 - b][cuIndex] >> LOWRES_COST_SHIFT; //
//最高两位存放 前向和后向使用情况， 一共16位的lowresCosts, 后14真的存放的是cost，前2位存放的是方向信息
lowresCosts ,画面在1/4分辨率时候的Cost , 右移位 14，
/* Follow the MVs to the previous frame(s). */
for (uint16_t list = 0; list < 2; list++) //0， 1 前向/后向参考传递
{
if ((lists_used >> list) & 1) //为1 表示有这个方向上的。 当list为0,
{
#define CLIP_ADD(s, x) (s) = (uint16_t)S265_MIN((s) + (x), (1 << 16) - 1)
int32_t listamount = propagate_amount;
/* Apply bipred weighting. */
if (lists_used == 3) //双向的
listamount = (listamount * bipredWeights[list] + 32) >> 6; //双向的需要调整，根据双向权重值
MV *mvs = frames[b]->lowresMvs[list][listDist[list]]; //拿到一个方向上的mv值
/* Early termination for simple case of mv 0. */
if (!mvs[cuIndex].word) //如果mv是0， 说明是
{
CLIP_ADD(refCosts[list][cuIndex], listamount);//直接把当前块的cost增大一些，当然不能超过1 << 16
//因为这里没有运动向量的cost, 所以只需要加上传递残差
continue;
}
int32_t x = mvs[cuIndex].x;
int32_t y = mvs[cuIndex].y;
int32_t cux = (x >> 5) + blockx;
int32_t cuy = (y >> 5) + blocky;
int32_t idx0 = cux + cuy * strideInCU; //当前块位置
int32_t idx1 = idx0 + 1;//当前块前一个块
int32_t idx2 = idx0 + strideInCU;// 当前块下面一个块
int32_t idx3 = idx0 + strideInCU + 1; //当前块，前面下方一个块
/*
D(当前块)
idx1
idx2
idx3
*/
x &= 31;
y &= 31;
int32_t idx0weight = (32 - y) * (32 - x);
int32_t idx1weight = (32 - y) * x;
int32_t idx2weight = y * (32 - x);
int32_t idx3weight = y * x;
/* We could just clip the MVs, but pixels that lie outside the frame probably shouldn't
* be counted. */
if (cux < m_8x8Width - 1 && cuy < m_8x8Height - 1 && cux >= 0 && cuy >= 0)
{ //给这些块，都加上传递残差的影响 ,在边界范围内的
CLIP_ADD(refCosts[list][idx0], (listamount * idx0weight + 512) >> 10);
CLIP_ADD(refCosts[list][idx1], (listamount * idx1weight + 512) >> 10);
CLIP_ADD(refCosts[list][idx2], (listamount * idx2weight + 512) >> 10);
CLIP_ADD(refCosts[list][idx3], (listamount * idx3weight + 512) >> 10);
}
else /* Check offsets individually
出界的，需要逐个判断，要不然idx 下标访问越界*/
{
if (cux < m_8x8Width && cuy < m_8x8Height && cux >= 0 && cuy >= 0)
CLIP_ADD(refCosts[list][idx0], (listamount * idx0weight + 512) >> 10);
if (cux + 1 < m_8x8Width && cuy < m_8x8Height && cux + 1 >= 0 && cuy >= 0)
CLIP_ADD(refCosts[list][idx1], (listamount * idx1weight + 512) >> 10);
if (cux < m_8x8Width && cuy + 1 < m_8x8Height && cux >= 0 && cuy + 1 >= 0)
CLIP_ADD(refCosts[list][idx2], (listamount * idx2weight + 512) >> 10);
if (cux + 1 < m_8x8Width && cuy + 1 < m_8x8Height && cux + 1 >= 0 && cuy + 1 >= 0)
CLIP_ADD(refCosts[list][idx3], (listamount * idx3weight + 512) >> 10);
}
}
}
}
}
}
if (m_param->rc.vbvBufferSize && m_param->lookaheadDepth && referenced)
cuTreeFinish(frames[b], averageDuration, b == p1 ? b - p0 : 0);
}

三传递残差具体计算

复制代码

/* Estimate the total amount of influence on future quality that could be had if we
* were to improve the reference samples used to inter predict any given CU. */
static void estimateCUPropagateCost(
int* dst, //最终存储传递残差Amount的地方
const uint16_t* propagateIn,//当前帧存储传递残差的地方，这个会不断迭代，因为一帧一帧参考关系的计算
const int32_t* intraCosts,
const uint16_t* interCosts,
const int32_t* invQscales,
const double* fpsFactor,
int len)
{
double fps = *fpsFactor / 256;
// range[0.01, 1.00]
for (int i = 0; i < len; i++) //一行的每个cu块
{
int intraCost = intraCosts[i]; //当前块的帧内Costs
int interCost = S265_MIN(intraCosts[i], interCosts[i] & LOWRES_COST_MASK);//00ffffff 低14bit存放的才是cost
//上面选择最小的作为Cost
double propagateIntra = intraCost * invQscales[i]; // Q16 x Q8.8 = Q24.8 帧内cost * 一定的系数
double propagateAmount = (double)propagateIn[i]/*原有的传递残差*/ + propagateIntra * fps; // Q16.0 + Q24.8 x Q0.x = Q25.0 帧内Cost * fps，计算1/duration
double propagateNum = (double)(intraCost - interCost); // Q32 - Q32 = Q33.0 帧间Cost
- 帧内Cost
double propagateDenom = (double)intraCost;
// Q32
dst[i] = (int)(propagateAmount * propagateNum / propagateDenom + 0.5);//最终的传递残差值
}
//}
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
/* Estimate the total amount of influence on future quality that could be had if we
* were to improve the reference samples used to inter predict any given CU. */
static void estimateCUPropagateCost(
int* dst, //最终存储传递残差Amount的地方
const uint16_t* propagateIn,//当前帧存储传递残差的地方，这个会不断迭代，因为一帧一帧参考关系的计算
const int32_t* intraCosts,
const uint16_t* interCosts,
const int32_t* invQscales,
const double* fpsFactor,
int len)
{
double fps = *fpsFactor / 256;
// range[0.01, 1.00]
for (int i = 0; i < len; i++) //一行的每个cu块
{
int intraCost = intraCosts[i]; //当前块的帧内Costs
int interCost = S265_MIN(intraCosts[i], interCosts[i] & LOWRES_COST_MASK);//00ffffff 低14bit存放的才是cost
//上面选择最小的作为Cost
double propagateIntra = intraCost * invQscales[i]; // Q16 x Q8.8 = Q24.8 帧内cost * 一定的系数
double propagateAmount = (double)propagateIn[i]/*原有的传递残差*/ + propagateIntra * fps; // Q16.0 + Q24.8 x Q0.x = Q25.0 帧内Cost * fps，计算1/duration
double propagateNum = (double)(intraCost - interCost); // Q32 - Q32 = Q33.0 帧间Cost
- 帧内Cost
double propagateDenom = (double)intraCost;
// Q32
dst[i] = (int)(propagateAmount * propagateNum / propagateDenom + 0.5);//最终的传递残差值
}
//}
}