亚像素2之1/2插值过程 xExtDIFUpSamplingH

通过前面一篇亚像素入口和过程的博客，分析了亚像素的入口和过程，这篇文章分析1/2插值的过程：

xExtDIFUpSamplingH 这个函数是进行1/2精度插值，首先对参考图像进行水平插值，整像素位置直接复制给了m_filteredBlockTmp[0]，调用了filterHor这个函数进行水平插值

调用了m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[0].getAddr(COMPONENT_Y), intStride, width + 1, height + filterSize, 0, false, chFmt, pattern->getBitDepthY());进行水平整像素复制给m_filteredBlockTmp[0]
filterHor这个函数分了三种情况：frac = 0、亮度和色度。frac = 0，即整数，不需要插值。，在进行整像素复制的时候有调用了filterCopy这个函数，

调用了filterCopy(bitDepth, src, srcStride, dst, dstStride, width, height, true, isLast );先看isFirst == true为第一次插值：Pel val = leftShift_round(src[col], shift);dst[col] = val - (Pel)IF_INTERNAL_OFFS;

再调用m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[2].getAddr(COMPONENT_Y), intStride, width + 1, height + filterSize, 2, false, chFmt, pattern->getBitDepthY());进行1/2像素水平插值赋值给m_filteredBlockTmp[2]

水平整像素插值后的Y做垂直方向整像素插值，结果存储在m_filteredBlock[0][0]中， dstPtr = m_filteredBlock[0][0].getAddr(COMPONENT_Y);
m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 0, 0, false, true, chFmt, pattern->getBitDepthY());

水平1/2像素插值后的Y做垂直方向1/2像素插值，结果存储在m_filteredBlock[2][0]中
   intPtr = m_filteredBlockTmp[0].getAddr(COMPONENT_Y) + (halfFilterSize - 1) * intStride + 1;
   dstPtr = m_filteredBlock[2][0].getAddr(COMPONENT_Y);
   m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 1, 2, false, true, chFmt, pattern->getBitDepthY());

/**
 * \brief Generate half-sample interpolated block
 *
 * \param pattern Reference picture ROI
 * \param biPred    Flag indicating whether block is for biprediction
 */
//产生1/2精度的插值像素块
Void TEncSearch::xExtDIFUpSamplingH(TComPattern* pattern)
{
	Int width = pattern->getROIYWidth();
	Int height = pattern->getROIYHeight();
	Int srcStride = pattern->getPatternLStride();
	//m_filteredBlockTmp[]是个临时的中间量。实际用于存储水平插值得到的各像素位置的YUV信息。[]中的数字指示水平插值的像素位置。
	//m_filteredBlock[][]在做完水平插值之后，会进行垂直插值，得到最终的数据，存储与m_filteredBlock[][]中。第一个[]中的数字指示垂直插值位置，第二个指示水平插值位置。 
	Int intStride = m_filteredBlockTmp[0].getStride(COMPONENT_Y);
	Int dstStride = m_filteredBlock[0][0].getStride(COMPONENT_Y);
	Pel *intPtr; 
	Pel *dstPtr;   //插值后的MV地址
	Int filterSize = NTAPS_LUMA;//8抽头
	Int halfFilterSize = (filterSize >> 1);//4
	Pel *srcPtr = pattern->getROIY() - halfFilterSize * srcStride - 1; //源MV地址

	const ChromaFormat chFmt = m_filteredBlock[0][0].getChromaFormat();

	//对参考图像进行水平插值，整像素位置直接复制给了m_filteredBlockTmp[0]，1/2像素位置插值后给了m_filteredBlockTmp[2]。 
	m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[0].getAddr(COMPONENT_Y), intStride, width + 1, height + filterSize, 0, false, chFmt, pattern->getBitDepthY());
	m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[2].getAddr(COMPONENT_Y), intStride, width + 1, height + filterSize, 2, false, chFmt, pattern->getBitDepthY());

	////水平整像素插值后的Y做垂直方向整像素插值，结果存储在m_filteredBlock[0][0]中
	intPtr = m_filteredBlockTmp[0].getAddr(COMPONENT_Y) + halfFilterSize * intStride + 1;
	dstPtr = m_filteredBlock[0][0].getAddr(COMPONENT_Y);
	m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 0, 0, false, true, chFmt, pattern->getBitDepthY());
	//水平整像素插值后的Y做垂直方向1/2像素插值，结果存储在m_filteredBlock[2][0]中
	intPtr = m_filteredBlockTmp[0].getAddr(COMPONENT_Y) + (halfFilterSize - 1) * intStride + 1;
	dstPtr = m_filteredBlock[2][0].getAddr(COMPONENT_Y);
	m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 1, 2, false, true, chFmt, pattern->getBitDepthY());
	//水平1/2像素插值后的Y做垂直方向整像素插值，结果存储在m_filteredBlock[0][2]中
	intPtr = m_filteredBlockTmp[2].getAddr(COMPONENT_Y) + halfFilterSize * intStride;
	dstPtr = m_filteredBlock[0][2].getAddr(COMPONENT_Y);
	m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 1, height + 0, 0, false, true, chFmt, pattern->getBitDepthY());
	//水平1/2像素插值后的Y做垂直方向1/2像素插值，结果存储在m_filteredBlock[2][2]中
	intPtr = m_filteredBlockTmp[2].getAddr(COMPONENT_Y) + (halfFilterSize - 1) * intStride;
	dstPtr = m_filteredBlock[2][2].getAddr(COMPONENT_Y);
	m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 1, height + 1, 2, false, true, chFmt, pattern->getBitDepthY());
}

/**
 * \brief Filter a block of Luma/Chroma samples (horizontal)
 *
 * \param  compID     Chroma component ID
 * \param  src        Pointer to source samples源MV地址
 * \param  srcStride  Stride of source samples
 * \param  dst        Pointer to destination samples插值后的MV地址
 * \param  dstStride  Stride of destination samples
 * \param  width      Width of block
 * \param  height     Height of block
 * \param  frac       Fractional sample offset分数偏移
 * \param  isLast     Flag indicating whether it is the last filtering operation
 * \param  fmt        Chroma format
 * \param  bitDepth   Bit depth
 */
//分了三种情况：frac = 0、亮度和色度。frac = 0，即整数，不需要插值。
Void TComInterpolationFilter::filterHor(const ComponentID compID, Pel *src, Int srcStride, Pel *dst, Int dstStride, Int width, Int height, Int frac, Bool isLast, const ChromaFormat fmt, const Int bitDepth )
{
  if ( frac == 0 )//整数直接复制
  {
    filterCopy(bitDepth, src, srcStride, dst, dstStride, width, height, true, isLast );
  }
  else if (isLuma(compID))  //Y分量小数插值
  {
    assert(frac >= 0 && frac < LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS);
    filterHor<NTAPS_LUMA>(bitDepth, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter[frac]);
  }
  else
  {
    const UInt csx = getComponentScaleX(compID, fmt);
    assert(frac >=0 && csx<2 && (frac<<(1-csx)) < CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS);
    filterHor<NTAPS_CHROMA>(bitDepth, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilter[frac<<(1-csx)]);
  }
}

/**
 * \brief Apply unit FIR filter to a block of samples
 *
 * \param bitDepth   bitDepth of samples
 * \param src        Pointer to source samples
 * \param srcStride  Stride of source samples
 * \param dst        Pointer to destination samples
 * \param dstStride  Stride of destination samples
 * \param width      Width of block
 * \param height     Height of block
 * \param isFirst    Flag indicating whether it is the first filtering operation
 * \param isLast     Flag indicating whether it is the last filtering operation
 */
Void TComInterpolationFilter::filterCopy(Int bitDepth, const Pel *src, Int srcStride, Pel *dst, Int dstStride, Int width, Int height, Bool isFirst, Bool isLast)
{
  Int row, col;

  if ( isFirst == isLast )
  {
    for (row = 0; row < height; row++)
    {
      for (col = 0; col < width; col++)
      {
        dst[col] = src[col];
      }

      src += srcStride;
      dst += dstStride;
    }
  }
  else if ( isFirst )
  {
    const Int shift = std::max<Int>(2, (IF_INTERNAL_PREC - bitDepth));

    for (row = 0; row < height; row++)
    {
      for (col = 0; col < width; col++)
      {
        Pel val = leftShift_round(src[col], shift);
        dst[col] = val - (Pel)IF_INTERNAL_OFFS;
      }

      src += srcStride;
      dst += dstStride;
    }
  }
  else
  {
    const Int shift = std::max<Int>(2, (IF_INTERNAL_PREC - bitDepth));

    Pel maxVal = (1 << bitDepth) - 1;
    Pel minVal = 0;
    for (row = 0; row < height; row++)
    {
      for (col = 0; col < width; col++)
      {
        Pel val = src[ col ];
        val = rightShift_round((val + IF_INTERNAL_OFFS), shift);
        if (val < minVal)
        {
          val = minVal;
        }
        if (val > maxVal)
        {
          val = maxVal;
        }
        dst[col] = val;
      }

      src += srcStride;
      dst += dstStride;
    }
  }
}

/**
 * \brief Filter a block of samples (horizontal)
 *
 * \tparam N          Number of taps
 * \param  bitDepth   Bit depth of samples
 * \param  src        Pointer to source samples
 * \param  srcStride  Stride of source samples
 * \param  dst        Pointer to destination samples
 * \param  dstStride  Stride of destination samples
 * \param  width      Width of block
 * \param  height     Height of block
 * \param  isLast     Flag indicating whether it is the last filtering operation
 * \param  coeff      Pointer to filter taps
 */
template<Int N>
Void TComInterpolationFilter::filterHor(Int bitDepth, Pel *src, Int srcStride, Pel *dst, Int dstStride, Int width, Int height, Bool isLast, TFilterCoeff const *coeff)
{
  if ( isLast )
  {
    filter<N, false, true, true>(bitDepth, src, srcStride, dst, dstStride, width, height, coeff);
  }
  else
  {
    filter<N, false, true, false>(bitDepth, src, srcStride, dst, dstStride, width, height, coeff);
  }
}

/**
 * \brief Apply FIR filter to a block of samples
 *
 * \tparam N          Number of taps// 抽头数
 * \tparam isVertical Flag indicating filtering along vertical direction
 * \tparam isFirst    Flag indicating whether it is the first filtering operation
 * \tparam isLast     Flag indicating whether it is the last filtering operation
 * \param  bitDepth   Bit depth of samples
 * \param  src        Pointer to source samples 源MV地址
 * \param  srcStride  Stride of source samples
 * \param  dst        Pointer to destination samples插值后的MV地址
 * \param  dstStride  Stride of destination samples
 * \param  width      Width of block
 * \param  height     Height of block
 * \param  coeff      Pointer to filter taps 抽头系数
 */
 // 插值滤波器
template<Int N, Bool isVertical, Bool isFirst, Bool isLast>
Void TComInterpolationFilter::filter(Int bitDepth, Pel const *src, Int srcStride, Pel *dst, Int dstStride, Int width, Int height, TFilterCoeff const *coeff)
{
  Int row, col;

  Pel c[8];//系数数组
  c[0] = coeff[0];//抽头0系数
  c[1] = coeff[1];//抽头1系数
  if ( N >= 4 ) //N≥4时，取抽头2和3系数（UV分量）
  {
    c[2] = coeff[2];
    c[3] = coeff[3];
  }
  if ( N >= 6 )   //N≥6时，取抽头4和5系数
  {
    c[4] = coeff[4];
    c[5] = coeff[5];
  }
  if ( N == 8 )//N=8时，取抽头6和7系数（Y分量）
  {
    c[6] = coeff[6];
    c[7] = coeff[7];
  }

  Int cStride = ( isVertical ) ? srcStride : 1; //垂直情况下步长为srcStride，水平情况下为1
  src -= ( N/2 - 1 ) * cStride;//找到前面N/2-1个整数点

  Int offset;
  Pel maxVal;
  Int headRoom = std::max<Int>(2, (IF_INTERNAL_PREC - bitDepth));
  Int shift    = IF_FILTER_PREC;
  // with the current settings (IF_INTERNAL_PREC = 14 and IF_FILTER_PREC = 6), though headroom can be
  // negative for bit depths greater than 14, shift will remain non-negative for bit depths of 8->20
  assert(shift >= 0);

  if ( isLast )
  {
    shift += (isFirst) ? 0 : headRoom;
    offset = 1 << (shift - 1);
    offset += (isFirst) ? 0 : IF_INTERNAL_OFFS << IF_FILTER_PREC;
    maxVal = (1 << bitDepth) - 1;
  }
  else
  {
    shift -= (isFirst) ? headRoom : 0;
    offset = (isFirst) ? -IF_INTERNAL_OFFS << shift : 0;
    maxVal = 0;
  }

#if VECTOR_CODING__INTERPOLATION_FILTER && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
  if( bitDepth <= 10 )
  {
    if( N == 8 && !( width & 0x07 ) )
    {
      Short minVal = 0;
      __m128i mmOffset = _mm_set1_epi32( offset );
      __m128i mmCoeff[8];
      __m128i mmMin = _mm_set1_epi16( minVal );
      __m128i mmMax = _mm_set1_epi16( maxVal );
      for( Int n = 0 ; n < 8 ; n++ )
        mmCoeff[n] = _mm_set1_epi16( c[n] );
      for( row = 0 ; row < height ; row++ )
      {
        for( col = 0 ; col < width ; col += 8 )
        {
          __m128i mmFiltered = simdInterpolateLuma8( src + col , cStride , mmCoeff , mmOffset , shift );
          if( isLast )
          {
            mmFiltered = simdClip3( mmMin , mmMax , mmFiltered );
          }
          _mm_storeu_si128( ( __m128i * )( dst + col ) , mmFiltered );
        }
        src += srcStride;
        dst += dstStride;
      }
      return;
    }
    else if( N == 8 && !( width & 0x03 ) )
    {
      Short minVal = 0;
      __m128i mmOffset = _mm_set1_epi32( offset );
      __m128i mmCoeff[8];
      __m128i mmMin = _mm_set1_epi16( minVal );
      __m128i mmMax = _mm_set1_epi16( maxVal );
      for( Int n = 0 ; n < 8 ; n++ )
        mmCoeff[n] = _mm_set1_epi16( c[n] );
      for( row = 0 ; row < height ; row++ )
      {
        for( col = 0 ; col < width ; col += 4 )
        {
          __m128i mmFiltered = simdInterpolateLuma4( src + col , cStride , mmCoeff , mmOffset , shift );
          if( isLast )
          {
            mmFiltered = simdClip3( mmMin , mmMax , mmFiltered );
          }
          _mm_storel_epi64( ( __m128i * )( dst + col ) , mmFiltered );
        }
        src += srcStride;
        dst += dstStride;
      }
      return;
    }
    else if( N == 4 && !( width & 0x03 ) )
    {
      Short minVal = 0;
      __m128i mmOffset = _mm_set1_epi32( offset );
      __m128i mmCoeff[8];
      __m128i mmMin = _mm_set1_epi16( minVal );
      __m128i mmMax = _mm_set1_epi16( maxVal );
      for( Int n = 0 ; n < 4 ; n++ )
        mmCoeff[n] = _mm_set1_epi16( c[n] );
      for( row = 0 ; row < height ; row++ )
      {
        for( col = 0 ; col < width ; col += 4 )
        {
          __m128i mmFiltered = simdInterpolateChroma4( src + col , cStride , mmCoeff , mmOffset , shift );
          if( isLast )
          {
            mmFiltered = simdClip3( mmMin , mmMax , mmFiltered );
          }
          _mm_storel_epi64( ( __m128i * )( dst + col ) , mmFiltered );
        }
        src += srcStride;
        dst += dstStride;
      }
      return;
    }
    else if( N == 2 && !( width & 0x07 ) )
    {
      Short minVal = 0;
      __m128i mmOffset = _mm_set1_epi32( offset );
      __m128i mmCoeff[2];
      __m128i mmMin = _mm_set1_epi16( minVal );
      __m128i mmMax = _mm_set1_epi16( maxVal );
      for( Int n = 0 ; n < 2 ; n++ )
        mmCoeff[n] = _mm_set1_epi16( c[n] );
      for( row = 0 ; row < height ; row++ )
      {
        for( col = 0 ; col < width ; col += 8 )
        {
          __m128i mmFiltered = simdInterpolateLuma2P8( src + col , cStride , mmCoeff , mmOffset , shift );
          if( isLast )
          {
            mmFiltered = simdClip3( mmMin , mmMax , mmFiltered );
          }
          _mm_storeu_si128( ( __m128i * )( dst + col ) , mmFiltered );
        }
        src += srcStride;
        dst += dstStride;
      }
      return;
    }
    else if( N == 2 && !( width & 0x03 ) )
    {
      Short minVal = 0;
      __m128i mmOffset = _mm_set1_epi32( offset );
      __m128i mmCoeff[8];
      __m128i mmMin = _mm_set1_epi16( minVal );
      __m128i mmMax = _mm_set1_epi16( maxVal );
      for( Int n = 0 ; n < 2 ; n++ )
        mmCoeff[n] = _mm_set1_epi16( c[n] );
      for( row = 0 ; row < height ; row++ )
      {
        for( col = 0 ; col < width ; col += 4 )
        {
          __m128i mmFiltered = simdInterpolateLuma2P4( src + col , cStride , mmCoeff , mmOffset , shift );
          if( isLast )
          {
            mmFiltered = simdClip3( mmMin , mmMax , mmFiltered );
          }
          _mm_storel_epi64( ( __m128i * )( dst + col ) , mmFiltered );
        }
        src += srcStride;
        dst += dstStride;
      }
      return;
    }
  }
#endif

  for (row = 0; row < height; row++)
  {
    for (col = 0; col < width; col++)
    {
      Int sum;
	  //从N/2-1个整数点开始，取N个整数点乘对应的抽头系数求和。
      sum  = src[ col + 0 * cStride] * c[0];
      sum += src[ col + 1 * cStride] * c[1];
      if ( N >= 4 )
      {
        sum += src[ col + 2 * cStride] * c[2];
        sum += src[ col + 3 * cStride] * c[3];
      }
      if ( N >= 6 )
      {
        sum += src[ col + 4 * cStride] * c[4];
        sum += src[ col + 5 * cStride] * c[5];
      }
      if ( N == 8 )
      {
        sum += src[ col + 6 * cStride] * c[6];
        sum += src[ col + 7 * cStride] * c[7];
      }

      Pel val = ( sum + offset ) >> shift;
      if ( isLast )
      {
        val = ( val < 0 ) ? 0 : val;//保证不小于0
        val = ( val > maxVal ) ? maxVal : val; //保证不越界
      }
      dst[col] = val; //存储插值后的MV
    }

    src += srcStride;
    dst += dstStride;
  }
}

亚像素2之1/2插值过程 xExtDIFUpSamplingH

猜你喜欢