一 传递残差的作用
传递残差最终会直接增加到当前帧的Cost上,影响了最终的码控,本文分析传递残差计算过程
二 代码详细分析
传递残差迭代过程
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116void Lookahead::estimateCUPropagate(Lowres **frames, double averageDuration, int p0, int p1, int b, int referenced) { /* 1 帧序列 2 平均duration 3 前向帧 4 后向帧 5 当前帧 6 是否被参考了 */ uint16_t *refCosts[2] = { frames[p0]->propagateCost, frames[p1]->propagateCost }; int32_t distScaleFactor = (((b - p0) << 8) + ((p1 - p0) >> 1)) / (p1 - p0); int32_t bipredWeight = m_param->bEnableWeightedBiPred ? 64 - (distScaleFactor >> 2) : 32; int32_t bipredWeights[2] = { bipredWeight, 64 - bipredWeight }; //计算双向权重 int listDist[2] = { b - p0, p1 - b }; memset(m_scratch, 0, m_8x8Width * sizeof(int)); uint16_t *propagateCost = frames[b]->propagateCost; //传递残差指针 s265_emms(); double fpsFactor = CLIP_DURATION((double)m_param->fpsDenom / m_param->fpsNum) / CLIP_DURATION(averageDuration); //duration factor,看下是否是不均匀的帧率,正常情况下是1 /* For non-referred frames the source costs are always zero, so just memset one row and re-use it. */ if (!referenced) //如果非参考 memset(frames[b]->propagateCost, 0, m_8x8Width * sizeof(uint16_t)); // int32_t strideInCU = m_8x8Width; //以cu为单位的行宽 for (uint16_t blocky = 0; blocky < m_8x8Height; blocky++) // { int cuIndex = blocky * strideInCU; //遍历每一行 if (m_param->rc.qgSize == 8) //如果qgSize == 8 primitives.propagateCost(m_scratch, propagateCost, frames[b]->intraCost + cuIndex, frames[b]->lowresCosts[b - p0][p1 - b] + cuIndex, frames[b]->invQscaleFactor8x8 + cuIndex, &fpsFactor, m_8x8Width); //计算传递残差 else primitives.propagateCost(m_scratch, propagateCost, frames[b]->intraCost + cuIndex, frames[b]->lowresCosts[b - p0][p1 - b] + cuIndex, frames[b]->invQscaleFactor + cuIndex, &fpsFactor, m_8x8Width); // if (referenced) //如果是参考帧, propagateCost += m_8x8Width; //偏移一行,要不就是复用的 for (uint16_t blockx = 0; blockx < m_8x8Width; blockx++, cuIndex++) //遍历每一行的每个块 { int32_t propagate_amount = m_scratch[blockx]; /* Don't propagate for an intra block. */ if (propagate_amount > 0) //propagate_amount 传递次数 { /* Access width-2 bitfield. */ int32_t lists_used = frames[b]->lowresCosts[b - p0][p1 - b][cuIndex] >> LOWRES_COST_SHIFT; // //最高两位存放 前向和后向使用情况, 一共16位的lowresCosts, 后14真的存放的是cost,前2位存放的是方向信息 lowresCosts ,画面在1/4分辨率时候的Cost , 右移位 14, /* Follow the MVs to the previous frame(s). */ for (uint16_t list = 0; list < 2; list++) //0, 1 前向/后向参考传递 { if ((lists_used >> list) & 1) //为1 表示有这个方向上的。 当list为0, { #define CLIP_ADD(s, x) (s) = (uint16_t)S265_MIN((s) + (x), (1 << 16) - 1) int32_t listamount = propagate_amount; /* Apply bipred weighting. */ if (lists_used == 3) //双向的 listamount = (listamount * bipredWeights[list] + 32) >> 6; //双向的需要调整,根据双向权重值 MV *mvs = frames[b]->lowresMvs[list][listDist[list]]; //拿到一个方向上的mv值 /* Early termination for simple case of mv 0. */ if (!mvs[cuIndex].word) //如果mv是0, 说明是 { CLIP_ADD(refCosts[list][cuIndex], listamount);//直接把当前块的cost增大一些,当然不能超过1 << 16 //因为这里没有运动向量的cost, 所以只需要加上传递残差 continue; } int32_t x = mvs[cuIndex].x; int32_t y = mvs[cuIndex].y; int32_t cux = (x >> 5) + blockx; int32_t cuy = (y >> 5) + blocky; int32_t idx0 = cux + cuy * strideInCU; //当前块位置 int32_t idx1 = idx0 + 1;//当前块前一个块 int32_t idx2 = idx0 + strideInCU;// 当前块下面一个块 int32_t idx3 = idx0 + strideInCU + 1; //当前块,前面下方一个块 /* D(当前块) idx1 idx2 idx3 */ x &= 31; y &= 31; int32_t idx0weight = (32 - y) * (32 - x); int32_t idx1weight = (32 - y) * x; int32_t idx2weight = y * (32 - x); int32_t idx3weight = y * x; /* We could just clip the MVs, but pixels that lie outside the frame probably shouldn't * be counted. */ if (cux < m_8x8Width - 1 && cuy < m_8x8Height - 1 && cux >= 0 && cuy >= 0) { //给这些块,都加上传递残差的影响 ,在边界范围内的 CLIP_ADD(refCosts[list][idx0], (listamount * idx0weight + 512) >> 10); CLIP_ADD(refCosts[list][idx1], (listamount * idx1weight + 512) >> 10); CLIP_ADD(refCosts[list][idx2], (listamount * idx2weight + 512) >> 10); CLIP_ADD(refCosts[list][idx3], (listamount * idx3weight + 512) >> 10); } else /* Check offsets individually 出界的,需要逐个判断,要不然idx 下标访问越界*/ { if (cux < m_8x8Width && cuy < m_8x8Height && cux >= 0 && cuy >= 0) CLIP_ADD(refCosts[list][idx0], (listamount * idx0weight + 512) >> 10); if (cux + 1 < m_8x8Width && cuy < m_8x8Height && cux + 1 >= 0 && cuy >= 0) CLIP_ADD(refCosts[list][idx1], (listamount * idx1weight + 512) >> 10); if (cux < m_8x8Width && cuy + 1 < m_8x8Height && cux >= 0 && cuy + 1 >= 0) CLIP_ADD(refCosts[list][idx2], (listamount * idx2weight + 512) >> 10); if (cux + 1 < m_8x8Width && cuy + 1 < m_8x8Height && cux + 1 >= 0 && cuy + 1 >= 0) CLIP_ADD(refCosts[list][idx3], (listamount * idx3weight + 512) >> 10); } } } } } } if (m_param->rc.vbvBufferSize && m_param->lookaheadDepth && referenced) cuTreeFinish(frames[b], averageDuration, b == p1 ? b - p0 : 0); }
三 传递残差具体计算
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28/* Estimate the total amount of influence on future quality that could be had if we * were to improve the reference samples used to inter predict any given CU. */ static void estimateCUPropagateCost( int* dst, //最终存储传递残差Amount的地方 const uint16_t* propagateIn,//当前帧存储传递残差的地方,这个会不断迭代,因为一帧一帧参考关系的计算 const int32_t* intraCosts, const uint16_t* interCosts, const int32_t* invQscales, const double* fpsFactor, int len) { double fps = *fpsFactor / 256; // range[0.01, 1.00] for (int i = 0; i < len; i++) //一行的每个cu块 { int intraCost = intraCosts[i]; //当前块的帧内Costs int interCost = S265_MIN(intraCosts[i], interCosts[i] & LOWRES_COST_MASK);//00ffffff 低14bit存放的才是cost //上面选择最小的作为Cost double propagateIntra = intraCost * invQscales[i]; // Q16 x Q8.8 = Q24.8 帧内cost * 一定的系数 double propagateAmount = (double)propagateIn[i]/*原有的传递残差*/ + propagateIntra * fps; // Q16.0 + Q24.8 x Q0.x = Q25.0 帧内Cost * fps,计算1/duration double propagateNum = (double)(intraCost - interCost); // Q32 - Q32 = Q33.0 帧间Cost - 帧内Cost double propagateDenom = (double)intraCost; // Q32 dst[i] = (int)(propagateAmount * propagateNum / propagateDenom + 0.5);//最终的传递残差值 } //} }
以上就是cuTree影响帧Cost的全过程
最后
以上就是单身大雁最近收集整理的关于x265 传递残差计算的全部内容,更多相关x265内容请搜索靠谱客的其他文章。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复