H.264中采用的是整数DCT变换,在实现的时候,该变换和量化又杂糅在一起,那么这些错综复杂的关系究竟是怎样纠缠的呢?在参考H.264乐园论坛会员cs1860wd的帖子和H.264 and MPEG-4 VIDEO COMPRESSION(第一版)这本书后,基于帖子和书上的讲解,给出相应的实现代码,并验证代码的正确性.
还是以foreman视频第一帧第一个宏块第一个4*4块为例. 下面给出像素值:
====================== Y Data ======================
+—————-+—————-+—————-+—————-+
| 43,216,254,249,|251,254,254,253,|251,252,254,254,|254,254,254,253,|
| 49,198,193,211,|228,205,213,185,|211,207,186,248,|198,203,208,183,|
| 48,194,177,171,|197,173,185,136,|191,195,138,179,|142,176,177,135,|
| 46,214,225,169,|177,189,198,160,|203,208,177,165,|173,196,191,156,|
+—————-+—————-+—————-+—————-+
| 41,185,208,180,|203,228,226,200,|214,226,225,227,|228,225,224,210,|
| 31,130,173,178,|215,230,221,212,|220,229,227,228,|229,227,226,226,|
| 29,119,194,216,|211,213,219,222,|225,223,220,219,|218,218,218,218,|
| 25,126,219,224,|217,224,227,227,|227,226,225,224,|220,220,221,222,|
+—————-+—————-+—————-+—————-+
| 26,131,215,223,|226,225,225,225,|225,226,223,219,|221,221,219,220,|
| 30,136,216,226,|223,224,225,225,|224,221,217,221,|222,219,220,226,|
| 30,136,216,227,|224,224,225,223,|221,218,221,216,|211,224,224,211,|
| 29,135,217,225,|222,221,222,222,|221,209,181,155,|186,210,186,164,|
+—————-+—————-+—————-+—————-+
| 29,134,216,224,|226,230,230,227,|206,177,146,113,|149,162,147,150,|
| 29,135,219,231,|225,201,190,185,|163,144,153,140,|127,143,165,184,|
| 30,139,210,192,|165,142,134,133,|143,141,129,138,|150,178,201,207,|
| 30,125,166,145,|144,154,132,111,|118,161,175,180,|204,214,213,209,|
+—————-+—————-+—————-+—————-+
该块的预测值为128(16个位置都是128),之前分析过,在JM8.6中,这一块在编码端和解码端的QDCT均为:
9 -12 -11 -5
3 -3 1 0
3 -1 -2 1
0 0 0 0
且用H.264visa从码流中得到的DCT系数为:(注意解码端的DCT与编码端的DCT必然不同)
====================== Y Data ======================
+————————+————————+————————+————————+
| 2304,-3840,-2816,-1600,| 768, 640, -256, 640,| 1280, 320, 256, -640,| 768, -320, -768, 0,|
| 960,-1200, 320, 0,| 0, 0, 320, 0,| -640, -800, 0, 0,| 960, -800, 320, 0,|
| 768, -320, -512, 320,| 512, -640, 0, -320,| -768, 320, -512, 320,| 768, 0, 256, 0,|
| 0, 0, 0, 0,| 0, 0, 0, 0,| 0, 400, 0, 0,| -320, 0, 0, 0,|
+————————+————————+————————+————————+
|-1024,-5120,-1792, -640,| 2560, -640, -256, 0,| 512, 0, 0, 0,| 0, 0, 0, 0,|
| 0, 1200, -640, -400,| -320, 400, 0, 0,| 640, 0, 0, 0,| 320, 0, 0, 0,|
| 512, 0, -256, 0,| 0, 0, 0, 0,| 0, 0, 0, 0,| 0, 0, 0, 0,|
| 320, 0, 0, 0,| -320, 0, 0, 0,| 0, 0, 0, 0,| 0, 0, 0, 0,|
+————————+————————+————————+————————+
| 0, 320, 0, 0,| 0, 0, 0, 0,| -768, 640, 0, 0,| 512, 0, -256, 0,|
| 0, 0, 0, 0,| 0, 0, 0, 0,| 640, -800, 0, 0,| -640, -400, 320, 0,|
| 0, 0, 0, 0,| 0, 0, 0, 0,| -256, 320, 0, 0,| 0, 320, 0, 0,|
| 0, 0, 0, 0,| 0, 0, 0, 0,| 320, -400, 0, 0,| -320, 0, 0, 0,|
+————————+————————+————————+————————+
| -512, 640, -256, 0,| 0, 0, 0, 0,| 1024, -320, -256, 0,| 1024, -320, 0, 0,|
| 960,-1200, 320, 0,| 960, -800, 320, 0,|-1280, 800, 0, 400,| -640, 400, 0, 0,|
| -512, 320, 0, 0,| 0, 0, -256, 0,| 0, 0, -256, 0,| 512, 640, 0, 0,|
| 0, 0, 0, 0,| -320, 0, 0, 0,| -640, 800, 0, 0,| 0, 0, 0, 0,|
+————————+————————+————————+————————+
下面根据变换,量化,反量化和反变换公式给出如下程序,看看是否与上述数据一致.(需要特别指出的是:JM8.6中并不是这么实现的,但本质是相同的,最后结果也一样.)
-
#include <iostream>
-
#include <cmath>
-
#define BLOCK_SIZE 4
-
using namespace std;
-
-
int QP =
28;
// 量化参数
-
-
//原始的YUV矩阵
-
int orgYUV[BLOCK_SIZE][BLOCK_SIZE] =
-
{
-
43,
216,
254,
249,
-
49,
198,
193,
211,
-
48,
194,
177,
171,
-
46,
214,
225,
169
-
};
-
-
//预测值矩阵
-
int predYUV[BLOCK_SIZE][BLOCK_SIZE] =
-
{
-
128,
128,
128,
128,
-
128,
128,
128,
128,
-
128,
128,
128,
128,
-
128,
128,
128,
128
-
};
-
-
int D[BLOCK_SIZE][BLOCK_SIZE];
//中间矩阵
-
int Di[BLOCK_SIZE][BLOCK_SIZE];
//中间矩阵
-
-
int W[BLOCK_SIZE][BLOCK_SIZE];
//核矩阵
-
int Z[BLOCK_SIZE][BLOCK_SIZE];
//QDCT矩阵
-
int Wi[BLOCK_SIZE][BLOCK_SIZE];
//Wi矩阵
-
-
int Xi[BLOCK_SIZE][BLOCK_SIZE];
//解码的残差矩阵
-
-
//Cf矩阵
-
int Cf[BLOCK_SIZE][BLOCK_SIZE]=
-
{
-
1,
1,
1,
1,
-
2,
1,
-1,
-2,
-
1,
-1,
-1,
1,
-
1,
-2,
2,
-1
-
};
-
-
//Ci矩阵
-
int Ci[BLOCK_SIZE][BLOCK_SIZE]=
-
{
-
2,
2,
2,
1,
-
2,
1,
-2,
-2,
-
2,
-1,
-2,
2,
-
2,
-2,
2,
-1
-
};
-
-
//MF矩阵
-
int MF[
6][
3]=
-
{
-
13107,
5243,
8066,
-
11916,
4660,
7490,
-
10082,
4194,
6554,
-
9362,
3647,
5825,
-
8192,
3355,
5243,
-
7282,
2893,
4559
-
};
-
-
//Qstep矩阵
-
int V[
6][
3]=
-
{
-
10,
16,
13,
-
11,
18,
14,
-
13,
20,
16,
-
14,
23,
18,
-
16,
25,
20,
-
18,
29,
23
-
};
-
-
//矩阵转置
-
void matrixTransform(
int a[][BLOCK_SIZE])
-
{
-
int i, j, tmp;
-
for(i =
0; i < BLOCK_SIZE; i++)
-
{
-
for(j =
0; j < BLOCK_SIZE; j++)
-
{
-
if(i < j)
-
{
-
tmp = a[i][j];
-
a[i][j] = a[j][i];
-
a[j][i] = tmp;
-
}
-
}
-
}
-
}
-
-
//矩阵求差
-
void matrixSubtract(
int a[][BLOCK_SIZE],
int b[][BLOCK_SIZE])
-
{
-
int i, j;
-
for(i =
0;i < BLOCK_SIZE; i++)
-
{
-
for(j =
0; j < BLOCK_SIZE; j++)
-
{
-
a[i][j] -= b[i][j];
-
}
-
}
-
}
-
-
//矩阵求积
-
void matrixMultiply(
int a[][BLOCK_SIZE],
int b[][BLOCK_SIZE],
int c[][BLOCK_SIZE])
-
{
-
int i, j, k;
-
for(i =
0; i < BLOCK_SIZE; i++)
-
{
-
for(j =
0; j < BLOCK_SIZE; j++)
-
{
-
c[i][j] =
0;
-
for(k =
0; k < BLOCK_SIZE; k++)
-
{
-
c[i][j] += a[i][k] * b[k][j];
-
}
-
}
-
}
-
}
-
-
//矩阵显示
-
void matrixShow(
int a[][BLOCK_SIZE])
-
{
-
int i, j;
-
cout <<
"*****************************" << endl;
-
for(i =
0; i < BLOCK_SIZE; i++)
-
{
-
for(j =
0; j < BLOCK_SIZE; j++)
-
{
-
cout << a[i][j] <<
"\t";
-
}
-
cout << endl;
-
}
-
-
cout <<
"*****************************" << endl << endl;
-
}
-
-
//求QDCT
-
void quantizeDCT(
int W[][BLOCK_SIZE])
-
{
-
// QP决定了qbits和f, QP和位置(i, j)共同决定了mf
-
int qbits =
15 + floor(QP /
6);
-
int f = (
int)( pow(
2.0, qbits) /
3 );
-
int mf;
-
-
int i, j, k;
-
for(i =
0; i < BLOCK_SIZE; i++)
-
{
-
for(j =
0; j < BLOCK_SIZE; j++)
-
{
-
//以下均依公式实现
-
-
//(0, 0), (2, 0), (0, 2), (2, 2) mf为 MF[QP % 6][0]
-
//(1, 1), (3, 1), (1, 3), (3, 3) mf为 MF[QP % 6][1];
-
// other positions mf为 MF[QP % 6][2];
-
-
if((
0 == i ||
2 == i) && (
0 == j ||
2 == j))
-
k =
0;
-
else
if((
1 == i ||
3 == i) && (
1 == j ||
3 == j))
-
k =
1;
-
else
-
k =
2;
-
-
mf = MF[QP %
6][k];
-
-
Z[i][j] = ( abs(W[i][j]) * mf + f ) >> qbits;
-
-
if(W[i][j] <
0)
-
Z[i][j] = -Z[i][j];
-
}
-
-
}
-
}
-
-
//求Wi(即解码端的DCT) (Z为QDCT)
-
void reverseQuantize(
int Z[][BLOCK_SIZE])
-
{
-
int t = floor(QP /
6);
-
int f = (
int)pow(
2, t);
-
int v;
-
-
int i, j, k;
-
for(i =
0; i < BLOCK_SIZE; i++)
-
{
-
for(j =
0; j < BLOCK_SIZE; j++)
-
{
-
//以下均依公式实现
-
-
if((
0 == i ||
2 == i) && (
0 == j ||
2 == j))
-
k =
0;
-
else
if((
1 == i ||
3 == i) && (
1 == j ||
3 == j))
-
k =
1;
-
else
-
k =
2;
-
-
v = V[QP %
6 ][k];
-
Wi[i][j] = Z[i][j] * v * f;
-
}
-
}
-
}
-
-
int main()
-
{
-
matrixSubtract(orgYUV, predYUV);
-
cout <<
"Residual matrix is" << endl;
-
matrixShow(orgYUV);
-
-
//此时orgYUV变为残差矩阵
-
matrixMultiply(Cf, orgYUV, D);
-
matrixTransform(Cf);
-
matrixMultiply(D, Cf, W);
-
-
//得到的W即为核
-
cout <<
"Matrix Core(W) is" << endl;
-
matrixShow(W);
-
-
//利用核W来得到QDCT(Z)
-
quantizeDCT(W);
-
cout <<
"Matrix QDCT(Z) is" << endl;
-
matrixShow(Z);
-
-
//利用QDCT(Z)得到解码端的DCT(Wi).(Wi与编码端DCT必然不同)
-
reverseQuantize(Z);
-
cout <<
"Matrix W'(解码端DCT) is" << endl;
-
matrixShow(Wi);
-
-
//利用Wi得到解码的残差矩阵Xi
-
matrixMultiply(Ci, Wi, Di);
-
matrixTransform(Ci);
-
matrixMultiply(Di, Ci, Xi);
-
int i,j;
-
for(i =
0;i <
4; i++)
-
{
-
for(j =
0; j <
4; j++)
-
{
-
Xi[i][j] =
int( Xi[i][j] /
256.0 +
0.5 );
-
}
-
}
-
cout <<
"Matrix Xi(解码端残差) is" << endl;
-
matrixShow(Xi);
-
-
return
0;
-
}
结果为:(可以看出,结果中的QDCT和解码端的DCT都与之前的数据吻合,从而证明上面程序的实现是正确的.)
Residual matrix is
*****************************
-85 88 126 121
-79 70 65 83
-80 66 49 43
-82 86 97 41
*****************************
Matrix Core(W) is
*****************************
609 -1255 -685 -560
277 -476 113 -73
175 -159 -119 98
-14 -13 4 1
*****************************
Matrix QDCT(Z) is
*****************************
9 -12 -11 -5
3 -3 1 0
3 -1 -2 1
0 0 0 0
*****************************
Matrix W’(解码端DCT) is
*****************************
2304 -3840 -2816 -1600
960 -1200 320 0
768 -320 -512 320
0 0 0 0
*****************************
Matrix Xi(解码端残差) is
*****************************
-77 88 132 110
-80 63 67 77
-82 62 48 39
-79 87 93 32
*****************************
下面,简要看看整数DCT变换的原理.(关于整数DCT变换公式的推倒,请参考余兆明的《图像编码标准H.264技术》).
DCT变换为: DCT = A * X * At
量化为: QDCT = floor(DCT/Qstep)
反量化为: DCT‘ = QDCT * Qstep
反DCT为: X’ = At * DCT‘ * A
但在H.264中采用的是整数DCT, 在JM8.6中的实现方式也很有讲究:( .* 表示矩阵对应元素相乘)
整数DCT变化为: DCT = (Cf * X *Cft) .* Ef
量化为: QDCT = floor(DCT/Qstep)
反量化为: DCT‘ = QDCT * Qstep
整数反DCT变换为: X’ = Ci * (DCT‘ .* Ei) *Ci
在实现的时候,经常不直接得到DCT系数,而是将变换和量化结合在一起实现. 整数DCT变换有很多好处:没有除法,没有浮点数,所以高效且准确,而且减少了矩阵运算.
最后感慨一下:如果上面的程序用matlab来仿真,就简单多了,matlab太适合处理矩阵问题了.