x265代码阅读：cudata.cpp代码阅读

转载至：https://blog.csdn.net/qq_22987913/article/details/52142597

感谢原作者的的详细分析，这里再次表示感谢

最近在阅读x265代码，发现cudata.cpp被调用的地方很多，索性完整的阅读一遍，在这里做些笔记，方便以后回顾。如果有错误，请网友指正。注：阅读的是2.0版x265，注2：看代码之前需要先阅读HEVC方面的参考书，我读的是Springer出版社的《High Efficiency Video Coding (HEVC) Algorithms and Architectures.pdf》

1.cudata的初始化

跟踪代码我找到了两个地方创建并初始化cudata，一个是FrameData::create里面：

[cpp]view plain copy
CUDataMemPool  m_cuMemPool;  
CUData*        m_picCTU;  
  
m_picCTU = new CUData[sps.numCUsInFrame];//sps.numCUsInFrame= (1280/64)*(720/64)= 20*11.25 => 20*12,创建240个CTU，刚好覆盖整幅图片  
m_cuMemPool.create(0, param.internalCsp, sps.numCUsInFrame);//param.internalCsp = X265_CSP_I420,创建一大块内存空间  
for (uint32_t ctuAddr = 0; ctuAddr < sps.numCUsInFrame; ctuAddr++)  
    m_picCTU[ctuAddr].initialize(m_cuMemPool, 0, param.internalCsp, ctuAddr);  

还有一个是Analysis::create里面：

[cpp]view plain copy
    struct Mode  
      {  
          CUData     cu;  
            ...  
      };  
      struct ModeDepth  
    {  
        Mode           pred[MAX_PRED_TYPES];//MAX_PRED_TYPES值为14，表示有14种预测模式  
        Mode*          bestMode;  
        Yuv            fencYuv;  
        CUDataMemPool  cuMemPool;  
    };  
      for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++, cuSize >>= 1)//g_maxCUDepth = 4表示四叉树最大深度  
    {  
        ModeDepth &md = m_modeDepth[depth];  
        md.cuMemPool.create(depth, csp, MAX_PRED_TYPES);  
  
        for (int j = 0; j < MAX_PRED_TYPES; j++)  
        {  
            md.pred[j].cu.initialize(md.cuMemPool, depth, csp, j);  
        }  
    }  

接下来再看cudata结构体：

[cpp]view plain copy
class CUData  
{  
public:  
    //这两个静态变量都只初始化一次，因为它们只跟最大块大小有关，最大块为64时，s_numPartInCUSize恒为16，s_partSet如下：  
    //s_partSet[0] = bcast256;s_partSet[1] = bcast64;s_partSet[2] = bcast16;s_partSet[3] = bcast4;s_partSet[4] = bcast1;  
    static cubcast_t s_partSet[NUM_FULL_DEPTH]; // pointer to broadcast set functions per absolute depth  
    static uint32_t  s_numPartInCUSize;  
  
    FrameData*    m_encData;  
    const Slice*  m_slice;  
  
    cucopy_t      m_partCopy;         // pointer to function that copies m_numPartitions elements  
    cubcast_t     m_partSet;          // pointer to function that sets m_numPartitions elements  
    cucopy_t      m_subPartCopy;      // pointer to function that copies m_numPartitions/4 elements, may be NULL  
    cubcast_t     m_subPartSet;       // pointer to function that sets m_numPartitions/4 elements, may be NULL  
  
    uint32_t      m_cuAddr;           // 该CU所属CTU在图片里的坐标，以64x64块为单位，raster order，对于1280*720的图片，  
                                      // 最大块为64时，可以取值0~239  ( (1280/64)*(720/64) = 20*11.25 => 20*12=240 )  
    uint32_t      m_absIdxInCTU;      // 该CU在CTU里面的相对位置，以4x4块为单位，由于一个64x64块可以分成256个4x4块，  
                                      // 所以它可以取值0~255，这里是Z scan order  
    uint32_t      m_cuPelX;           // CU左上角在图片里的坐标X，可以取值0~1280，间隔为4（因为最小CU为4x4）  
    uint32_t      m_cuPelY;           // CU左上角在图片里的坐标Y，可以取值0~720，间隔为4  
    uint32_t      m_numPartitions;    // 本cu里面4x4块的数量  
  
    uint32_t      m_chromaFormat;     //颜色模式，可以为X265_CSP_I400,X265_CSP_I420,X265_CSP_I422,X265_CSP_I444  
    uint32_t      m_hChromaShift;  
    uint32_t      m_vChromaShift;  
  
    /* Per-part data, stored contiguously */  
    int8_t*       m_qp;               // array of QP values  
    uint8_t*      m_log2CUSize;       // array of cu log2Size TODO: seems redundant to depth  
    uint8_t*      m_lumaIntraDir;     // array of intra directions (luma)  
    uint8_t*      m_tqBypass;         // array of CU lossless flags  
    int8_t*       m_refIdx[2];        // array of motion reference indices per list  
    uint8_t*      m_cuDepth;          // array of depths  
    uint8_t*      m_predMode;         // array of prediction modes  
    uint8_t*      m_partSize;         // array of partition sizes  
    uint8_t*      m_mergeFlag;        // array of merge flags  
    uint8_t*      m_interDir;         // array of inter directions  
    uint8_t*      m_mvpIdx[2];        // array of motion vector predictor candidates or merge candidate indices [0]  
    uint8_t*      m_tuDepth;          // array of transform indices  
    uint8_t*      m_transformSkip[3]; // array of transform skipping flags per plane  
    uint8_t*      m_cbf[3];           // array of coded block flags (CBF) per plane  
    uint8_t*      m_chromaIntraDir;   // array of intra directions (chroma)  
    enum { BytesPerPartition = 21 };  // combined sizeof() of all per-part data  
  
    coeff_t*      m_trCoeff[3];       // transformed coefficient buffer per plane  
  
    MV*           m_mv[2];            // array of motion vectors per list  
    MV*           m_mvd[2];           // array of coded motion vector deltas per list  
    enum { TMVP_UNIT_MASK = 0xF0 };  // mask for mapping index to into a compressed (reference) MV field  
  
    const CUData* m_cuAboveLeft;      // pointer to above-left neighbor CTU  
    const CUData* m_cuAboveRight;     // pointer to above-right neighbor CTU  
    const CUData* m_cuAbove;          // pointer to above neighbor CTU  
    const CUData* m_cuLeft;           // pointer to left neighbor CTU  
    //下面是成员函数，省略  
};  

cudata.cpp里面的初始化代码：

[cpp]view plain copy
//参数：datapool为一大块内存。  
//      depth如果为0，则表示64x64的块，为1则是32x32，2则是16x16，3则是8x8  
//      csp表示颜色模式，可以为X265_CSP_I400,X265_CSP_I420,X265_CSP_I422,X265_CSP_I444，我这里默认用420  
//      instance为了帮助在datapool里面定位。  
void CUData::initialize(const CUDataMemPool& dataPool, uint32_t depth, int csp, int instance)  

代码就不贴出来了，从初始化可以看出，cudata里面的21个数组的长度都是m_numPartitions个字节。（m_numPartitions指的是CU里4x4块的数量。如64x64的CTU，m_numPartitions=256）。
这里再说明一下cudata里的几个成员函数指针：

m_partSet设置上面数组的值，numPartitions个。如：m_partSet((uint8_t*)m_qp, (uint8_t)qp);就可以将数组m_qp都设为qp了（m_numPartitions个）。

m_partCopy拷贝数组的值，numPartitions个。如：

[cpp]view plain copy
//这里的ctu是由FrameData::create创建的，跟当前cu创建的地方可能不同,m_encData,m_cuAddr和m_absIdxInCTU是当前cu的成员变量  
CUData& ctu = *m_encData->getPicCTU(m_cuAddr);   
m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp);//将该CU的m_qp数组拷贝到FrameData里面对应的CTU的对应CU的数组m_qp里  

m_subPartSet和m_subPartCopy跟上面的相似，如：

[cpp]view plain copy
//对于64x64的块，childGeom.numPartitions为256/4 = 64；对于32x32块，为16；对于16x16的块，为4  
uint32_t offset = childGeom.numPartitions * subPartIdx;//subPartIdx < 4  
m_subPartCopy((uint8_t*)m_qp + offset, (uint8_t*)subCU.m_qp);//将subCU里的数组拷贝到当前CU的m_qp的对应位置，subCU的大小为当前CU的1/4  

另外还有两个初始化函数void CUData::initCTU和void CUData::initSubCU，里面有个地方比较难懂：

[cpp]view plain copy
/* initialize the remaining CU data in one memset */  
//BytesPerPartition为21，由上面的initialize函数就可以看出，m_cuDepth以及它后面的15指针所指的地址是连续的。  
//所以这里将从m_cuDepth开始，后面14个指针指向的区域都设为了0。14*256个0。  
memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ? BytesPerPartition - 11 : BytesPerPartition - 7) * m_numPartitions);  

2.拷贝函数

[cpp]view plain copy
/* Copy the results of a sub-part (split) CU to the parent CU */  
//将subCU的所有数组拷贝到当前CU的数组的对应位置，subCU的大小只有当前cu的1/4  
//childGeom只用到了childGeom.numPartitions和childGeom.depth，分别代表subCU的大小和它在树中的深度信息。  
//subPartIdx可以取值0~3，表示subCU在父CU中的位置  
void CUData::copyPartFrom(const CUData& subCU, const CUGeom& childGeom, uint32_t subPartIdx)  
  
/* If a sub-CU part is not present (off the edge of the picture) its depth and 
 * log2size should still be configured */  
//对于超出图像边界的cu块，其数组m_cuDepth和m_log2CUSize同样需要设置为正确值  
void CUData::setEmptyPart(const CUGeom& childGeom, uint32_t subPartIdx)  
  
/* Copy completed predicted CU to CTU in picture */  
//将当前CU的所有数组拷贝到FrameData里面对应的CTU的对应CU里  
void CUData::copyToPic(uint32_t depth) const  
  
/* The reverse of copyToPic, called only by encodeResidue */  
//上面函数的逆过程  
//参数：ctu是拷贝源，cuGeom为要拷贝子块的信息（大小，坐标），csp为颜色模式，copyQp表示是否拷贝m_qp数组  
void CUData::copyFromPic(const CUData& ctu, const CUGeom& cuGeom, int csp, bool copyQp)  
  
/* Only called by encodeResidue, these fields can be modified during inter/intra coding */  
//跟上面的copyToPic类似，只是拷贝的数组没有copyToPic多。注释说是这几个数组在inter/intra编码过程中会被改变  
void CUData::updatePic(uint32_t depth, int picCsp) const  

另外，cudata.h里面还定义了下面这些函数，都是根据depth决定每次填充多少：

[cpp]view plain copy
/* these functions all take depth as an absolute depth from CTU, it is used to calculate the number of parts to copy */  
void     setQPSubParts(int8_t qp, uint32_t absPartIdx, uint32_t depth)                      
         { s_partSet[depth]((uint8_t*)m_qp + absPartIdx, (uint8_t)qp); }  
void     setTUDepthSubParts(uint8_t tuDepth, uint32_t absPartIdx, uint32_t depth)           
         { s_partSet[depth](m_tuDepth + absPartIdx, tuDepth); }  
void     setLumaIntraDirSubParts(uint8_t dir, uint32_t absPartIdx, uint32_t depth)          
         { s_partSet[depth](m_lumaIntraDir + absPartIdx, dir); }  
void     setChromIntraDirSubParts(uint8_t dir, uint32_t absPartIdx, uint32_t depth)         
         { s_partSet[depth](m_chromaIntraDir + absPartIdx, dir); }  
void     setCbfSubParts(uint8_t cbf, TextType ttype, uint32_t absPartIdx, uint32_t depth)   
         { s_partSet[depth](m_cbf[ttype] + absPartIdx, cbf); }  
void     setCbfPartRange(uint8_t cbf, TextType ttype, uint32_t absPartIdx, uint32_t coveredPartIdxes)   
         { memset(m_cbf[ttype] + absPartIdx, cbf, coveredPartIdxes); }  
void     setTransformSkipSubParts(uint8_t tskip, TextType ttype, uint32_t absPartIdx, uint32_t depth)   
         { s_partSet[depth](m_transformSkip[ttype] + absPartIdx, tskip); }  
void     setTransformSkipPartRange(uint8_t tskip, TextType ttype, uint32_t absPartIdx, uint32_t coveredPartIdxes)   
         { memset(m_transformSkip[ttype] + absPartIdx, tskip, coveredPartIdxes); }  

3.获取相邻块函数

[cpp]view plain copy
//函数功能：返回CUData对象的一个子块的左边的块。  
//参数    ：curPartUnitIdx为CUData对象的一个子块的坐标。该坐标是该子块相对于该cu所属的CTU的。  
//                 以4x4块为单位。对于一个64x64的CTU来说，可以有16x16=256个4x4块。坐标编号为0~255.  
//         lPartUnitIdx和函数返回值都作为返回值：（这里为了直观说明，混淆了Z order和Raster坐标变换）（下面的16是指当CTU为64x64时，每行有16个4x4块）  
//                 当curPartUnitIdx对应的块在父CU的父CTU的最左边时，函数返回值为当前CTU左边的一个CTU,lPartUnitIdx为curPartUnitIdx + 16 - 1  
//             否则  
//                 当curPartUnitIdx对应的块在父CU最左边时，函数返回值为当前的CU的父CTU，lPartUnitIdx为curPartUnitIdx - 1  
//                 当curPartUnitIdx对应的块不在父CU最左边时，函数返回值为当前的CU，lPartUnitIdx为curPartUnitIdx - 1 - 父CU的地址  
const CUData* CUData::getPULeft(uint32_t& lPartUnitIdx, uint32_t curPartUnitIdx) const  
  
//下面这几个函数跟getPULeft差不多  
//函数功能：返回CUData对象的一个子块的正上方的块。  
const CUData* CUData::getPUAbove(uint32_t& aPartUnitIdx, uint32_t curPartUnitIdx) const  
//函数功能：返回CUData对象的一个子块的左上角的块。  
const CUData* CUData::getPUAboveLeft(uint32_t& alPartUnitIdx, uint32_t curPartUnitIdx) const  
//函数功能：返回CUData对象的一个子块的右上角的块。  
const CUData* CUData::getPUAboveRight(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx) const  
//函数功能：返回CUData对象的一个子块的左下角的块。  
const CUData* CUData::getPUBelowLeft(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx) const  

[cpp]view plain copy
//在getPUBelowLeft函数基础上多了个参数partUnitOffset，表示需要获取的块在更下面，  
//所获取的块与左下角的块相比，在其下方partUnitOffset*4个像素处，也就是中间隔了partUnitOffset个4x4块  
const CUData* CUData::getPUBelowLeftAdi(uint32_t& blPartUnitIdx,  uint32_t curPartUnitIdx, uint32_t partUnitOffset) const  
//跟上面类似，所获取的块与右上角相比，在其右方(partUnitOffset - 1)*4个像素处，也就是中间隔了partUnitOffset - 1个4x4块  
const CUData* CUData::getPUAboveRightAdi(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const  

另外，还有获取一个cu的子块的内部4x4块的坐标的函数。他们的参数puIdx只能取值0,1,2,3，因为一个cu一次最多只能分成四个。

[cpp]view plain copy
//获取该CU的子块partIdx的左上角和右上角的4x4块的坐标，partIdx只能取值0，1，2，3  
void CUData::deriveLeftRightTopIdx(uint32_t partIdx, uint32_t& partIdxLT, uint32_t& partIdxRT) const  
  
//获取该CU的子块puIdx的左下角的4x4块的坐标  
uint32_t CUData::deriveLeftBottomIdx(uint32_t puIdx) const  
  
//获取该CU的子块puIdx的右下角的4x4块的坐标  
uint32_t CUData::deriveRightBottomIdx(uint32_t puIdx) const  

4.获取qp函数

将一个CTU分成四等分，从上到下，从左到右编号为区间0，区间1，区间2，区间3

[cpp]view plain copy
/* Get left QpMinCu */  
//函数功能：如果该cu的子块curAbsIdxInCTU在该CU的父CTU的左半边，即区间0和2，则返回NULL，lPartUnitIdx无意义  
//          如果该cu的子块curAbsIdxInCTU在该CU的父CTU的右半边，即区间1和3，这返回FrameData里对应的CTU，lPartUnitIdx分别为  
//          21（区间1）和139（区间3），21转换为raster坐标即为7，149转换后为135，见下图。  
const CUData* CUData::getQpMinCuLeft(uint32_t& lPartUnitIdx, uint32_t curAbsIdxInCTU) const  
  
/* Get above QpMinCu */  
//函数功能：如果该cu的子块curAbsIdxInCTU在该CU的父CTU的上半边，即区间0和1，则返回NULL,lPartUnitIdx无意义  
//          如果该cu的子块curAbsIdxInCTU在该CU的父CTU的下半边，即区间2和3，则返回FrameData里对应的CTU，lPartUnitIdx分别为  
//          42（区间2）和106（区间3），42转换为raster坐标即为112，106转换后为120，见下图。  
const CUData* CUData::getQpMinCuAbove(uint32_t& aPartUnitIdx, uint32_t curAbsIdxInCTU) const  

[cpp]view plain copy
//本函数只被下面的函数getLastCodedQP调用。  
//函数功能:如果该cu的子块absPartIdx的前面一块(按z scan order)已经预测了，则返回前面一块的坐标。  
//      如果没有预测，则前面一块继续找前面一块，直到找到已经预测了的块，或者超出该cu范围时停止  
int CUData::getLastValidPartIdx(int absPartIdx) const  
  
//函数功能:1).先在本cu里面寻找，该子块absPartIdx前面是否有块已经预测了。  
//        如果有，则返回前面已经预测的qp。  
//      2).否则，在该ctu里面寻找已经预测了的块。找到则返回qp。  
//      3).如果还没找到，判断该ctu是否在图片的最左边，如果不在就在左边的CTU里面寻找，  
//        看对应位置是否已经预测，没找到继续向左找。  
//      4).如果找到了图片边界，就返回slice里的qp  
int8_t CUData::getLastCodedQP(uint32_t absPartIdx) const  

[cpp]view plain copy
/* Get reference QP from left QpMinCu or latest coded QP */  
//上面四个函数只被这个函数调用，用于获取qp  
int8_t CUData::getRefQP(uint32_t curAbsIdxInCTU) const  
{  
    uint32_t lPartIdx = 0, aPartIdx = 0;  
    const CUData* cULeft = getQpMinCuLeft(lPartIdx, m_absIdxInCTU + curAbsIdxInCTU);  
    const CUData* cUAbove = getQpMinCuAbove(aPartIdx, m_absIdxInCTU + curAbsIdxInCTU);  
  
    return ((cULeft ? cULeft->m_qp[lPartIdx] : getLastCodedQP(curAbsIdxInCTU)) + (cUAbove ? cUAbove->m_qp[aPartIdx] : getLastCodedQP(curAbsIdxInCTU)) + 1) >> 1;  
}  

5.运动估计相关

[cpp]view plain copy
//有点像书上的5.2.2.2 Merge Candidate List Construction  
//返回值:候选列表的长度。  
//参数:puIdx可以取值0,1,2,3，表示子块的位置。  
//     absPartIdx应该也是子块的坐标，但是函数里没怎么用。  
//     candMvField和candDir为返回值，实际上只是找到了合适的预测块后，拷贝对应值:  
//     candMvField[i].mv = cu->m_mv[picList][absPartIdx];  
//     candMvField[i].refIdx = cu->m_refIdx[picList][absPartIdx];  
//     candDir[i] = cu->m_interDir[absPartIdx];  
//代码里面有点奇怪的是isDiffMER函数的调用，我加printf测试了一下，所有的isDiffMER函数都返回true。搞不懂是为什么。  
//里面的临时变量nPSW和nPSH是子快的宽度和高度，单位是像素。xP和yP是子块的左上角坐标，单位也是像素。  
uint32_t CUData::getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField(*candMvField)[2], uint8_t* candDir) const  
  
/* Constructs a list of candidates for AMVP, and a larger list of motion candidates */  
//填充neighbours[]结构数组，结构数组有下面6个：  
//motion vector predictor direction used in AMVP  
//enum MVP_DIR  
//{  
//    MD_LEFT = 0,    // MVP of left block  
//    MD_ABOVE,       // MVP of above block  
//    MD_ABOVE_RIGHT, // MVP of above right block  
//    MD_BELOW_LEFT,  // MVP of below left block  
//    MD_ABOVE_LEFT,  // MVP of above left block  
//    MD_COLLOCATED   // MVP of temporal neighbour  
//};  
//调用下面的getInterNeighbourMV函数填充前五个，第六个调用下面的getCollocatedMV函数填充。  
void CUData::getNeighbourMV(uint32_t puIdx, uint32_t absPartIdx, InterNeighbourMV* neighbours) const  
  
//函数功能：先获取该CU的partUnitIdx子块的dir方向的块，dir可以为上面的MD_BELOW_LEFT等6个值。  
//          然后将该块的m_mv和m_refIdx填入neighbour结构体里面。  
void CUData::getInterNeighbourMV(InterNeighbourMV *neighbour, uint32_t partUnitIdx, MVP_DIR dir) const  
  
//同位pu:即当前pu在邻近已编码图像中对应位置pu  
//函数功能:获得同位pu的m_mv。然后和当前pu的m_mv相比较，取两个m_mv的中间值。  
bool CUData::getColMVP(MV& outMV, int& outRefIdx, int picList, int cuAddr, int partUnitIdx) const  
  
//函数功能:将同位pu的m_mv和m_refIdx拷贝到neighbour结构体里面。  
// Cache the collocated MV.  
bool CUData::getCollocatedMV(int cuAddr, int partUnitIdx, InterNeighbourMV *neighbour) const  

6.Geoms初始化函数

单看函数可能有点抽象，在Frameencoder.cpp里面会初始化并使用Geoms，一般是这样使用的：m_cuGeoms[m_ctuGeomMap[cuAddr]]。

a).m_cuGeoms的初始化：如果视频的宽度和高度都不是64的整数倍，则会计算出4个m_cuGeoms，分别是：
完整64*64CTU(CTU在图像内)，缺右边(超出了图像右边界)，缺下边(超出了图像下边界)，同时缺右边和下边(图像右下角CTU)
比如说我现在用的是1280*720的视频，其中1280是64的倍数，720不是。所以初始化会产生两个m_cuGeoms：
m_cuGeoms[0]为完整的64*64CTU,m_cuGeoms[85]为缺下边（超出了图像下边界）(这里85是因为一个CTU有85种划分方式)。
它们的初始化是分别调用：void CUData::calcCTUGeoms(64,64,64,8,m_cuGeoms);和
void CUData::calcCTUGeoms(64,16,64,8,m_cuGeoms + 85);
b).cuAddr取值分别为0，1，2，...，19 ，[20，21，...，39]，中括号表示可以并行.(1280/64 = 20列)
c).如果视频尺寸为1280*720，则m_ctuGeomMap[0 ~ 220]为0，m_ctuGeomMap[220 ~ 240]为85.因为视频
有720/64 = 11.25 = 12行，前11行都是完整CTU，所以m_ctuGeomMap[0 ~ 220]都为0.

[cpp]view plain copy
chushihuconst uint32_t g_depthScanIdx[8][8] = {  
    {  0,   1,   4,   5,  16,  17,  20,  21,  },  
    {  2,   3,   6,   7,  18,  19,  22,  23,  },  
    {  8,   9,  12,  13,  24,  25,  28,  29,  },  
    {  10,  11,  14,  15,  26,  27,  30,  31,  },  
    {  32,  33,  36,  37,  48,  49,  52,  53,  },  
    {  34,  35,  38,  39,  50,  51,  54,  55,  },  
    {  40,  41,  44,  45,  56,  57,  60,  61,  },  
    {  42,  43,  46,  47,  58,  59,  62,  63,  }  
};  
#define CU_SET_FLAG(bitfield, flag, value) (bitfield) = ((bitfield) & (~(flag))) | ((~((value) - 1)) & (flag))  
  
void CUData::calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t maxCUSize, uint32_t minCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS])  
{  
    // Initialize the coding blocks inside the CTB  
    for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0; log2CUSize >= g_log2Size[minCUSize]; log2CUSize--)  
    {  
        uint32_t blockSize = 1 << log2CUSize;  
        uint32_t sbWidth   = 1 << (g_log2Size[maxCUSize] - log2CUSize);  
        int32_t lastLevelFlag = log2CUSize == g_log2Size[minCUSize];  
  
  
        for (uint32_t sbY = 0; sbY < sbWidth; sbY++)  
        {  
            for (uint32_t sbX = 0; sbX < sbWidth; sbX++)  
            {  
                uint32_t depthIdx = g_depthScanIdx[sbY][sbX];  
                uint32_t cuIdx = rangeCUIdx + depthIdx;  
                uint32_t childIdx = rangeCUIdx + sbWidth * sbWidth + (depthIdx << 2);  
                uint32_t px = sbX * blockSize;  
                uint32_t py = sbY * blockSize;  
                        //这两个Flag用于判断一个块是全部在图像外面还是部分在图像外面。  
                        //如果部分在图像外面，并且非叶子节点，则presentFlag = true，splitMandatoryFlag = true  
                        //如果全部在图像外面则，都为false  
                int32_t presentFlag = px < ctuWidth && py < ctuHeight;  
                int32_t splitMandatoryFlag = presentFlag && !lastLevelFlag && (px + blockSize > ctuWidth || py + blockSize > ctuHeight);  
                  
                /* Offset of the luma CU in the X, Y direction in terms of pixels from the CTU origin */  
                uint32_t xOffset = (sbX * blockSize) >> 3;  
                uint32_t yOffset = (sbY * blockSize) >> 3;  
                X265_CHECK(cuIdx < CUGeom::MAX_GEOMS, "CU geom index bug\n");  
  
  
                CUGeom *cu = cuDataArray + cuIdx;  
                cu->log2CUSize = log2CUSize;//该cu的大小  
                cu->childOffset = childIdx - cuIdx;//该cu的第一个子cu的索引与该cu自己的索引之差  
                cu->absPartIdx = g_depthScanIdx[yOffset][xOffset] * 4;//该cu在CTU里面的位置，以4x4块为单位  
                //该cu可以分成多少个4x4的块。  
                cu->numPartitions = (NUM_4x4_PARTITIONS >> ((g_maxLog2CUSize - cu->log2CUSize) * 2));  
                cu->depth = g_log2Size[maxCUSize] - log2CUSize;//该cu块所在的深度  
                cu->geomRecurId = cuIdx;//该cu的索引  
  
  
                cu->flags = 0;  
                CU_SET_FLAG(cu->flags, CUGeom::PRESENT, presentFlag);  
                CU_SET_FLAG(cu->flags, CUGeom::SPLIT_MANDATORY | CUGeom::SPLIT, splitMandatoryFlag);  
                CU_SET_FLAG(cu->flags, CUGeom::LEAF, lastLevelFlag);  
                        //当该cu部分在图像外面，且非叶子节点时，会设置CUGeom::PRESENT，CUGeom::SPLIT_MANDATORY | CUGeom::SPLIT  
                //当该cu完全在图像里面时，会设置CUGeom::PRESENT  
                //当该cu完全在图像外面时，会设置为0.  
            }  
        }  
        rangeCUIdx += sbWidth * sbWidth;  
    }  
}  

x265代码阅读：cudata.cpp代码阅读

猜你喜欢