Void TEncSearch::estIntraPredQT( TComDataCU* pcCU, TComYuv* pcOrgYuv, TComYuv* pcPredYuv, TComYuv* pcResiYuv, TComYuv* pcRecoYuv, UInt& ruiDistC, Bool bLumaOnly ) { UInt uiDepth = pcCU->getDepth(0); UInt uiNumPU = pcCU->getNumPartitions(); UInt uiInitTrDepth = pcCU->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1; UInt uiWidth = pcCU->getWidth (0) >> uiInitTrDepth; UInt uiHeight = pcCU->getHeight(0) >> uiInitTrDepth; UInt uiQNumParts = pcCU->getTotalNumPart() >> 2; UInt uiWidthBit = pcCU->getIntraSizeIdx(0); UInt uiOverallDistY = 0; UInt uiOverallDistC = 0; UInt CandNum; Double CandCostList[ FAST_UDI_MAX_RDMODE_NUM ]; //===== set QP and clear Cbf ===== if ( pcCU->getSlice()->getPPS()->getUseDQP() == true) { pcCU->setQPSubParts( pcCU->getQP(0), 0, uiDepth ); } else { pcCU->setQPSubParts( pcCU->getSlice()->getSliceQp(), 0, uiDepth ); } //===== loop over partitions ===== UInt uiPartOffset = 0; for( UInt uiPU = 0; uiPU < uiNumPU; uiPU++, uiPartOffset += uiQNumParts ) { //===== init pattern for luma prediction ===== Bool bAboveAvail = false; Bool bLeftAvail = false; pcCU->getPattern()->initPattern ( pcCU, uiInitTrDepth, uiPartOffset ); pcCU->getPattern()->initAdiPattern( pcCU, uiPartOffset, uiInitTrDepth, m_piYuvExt, m_iYuvExtStride, m_iYuvExtHeight, bAboveAvail, bLeftAvail ); //===== determine set of modes to be tested (using prediction signal only) ===== Int numModesAvailable = 35; //total number of Intra modes Pel* piOrg = pcOrgYuv ->getLumaAddr( uiPU, uiWidth ); Pel* piPred = pcPredYuv->getLumaAddr( uiPU, uiWidth ); UInt uiStride = pcPredYuv->getStride(); UInt uiRdModeList[FAST_UDI_MAX_RDMODE_NUM]; Int numModesForFullRD = g_aucIntraModeNumFast[ uiWidthBit ]; Bool doFastSearch = (numModesForFullRD != numModesAvailable); if (doFastSearch) { assert(numModesForFullRD < numModesAvailable); for( Int i=0; i < numModesForFullRD; i++ ) { CandCostList[ i ] = MAX_DOUBLE; } CandNum = 0; for( Int modeIdx = 0; modeIdx < numModesAvailable; modeIdx++ ) { UInt uiMode = modeIdx; predIntraLumaAng( pcCU->getPattern(), uiMode, piPred, uiStride, uiWidth, uiHeight, bAboveAvail, bLeftAvail ); // use hadamard transform here UInt uiSad = m_pcRdCost->calcHAD(g_bitDepthY, piOrg, uiStride, piPred, uiStride, uiWidth, uiHeight ); UInt iModeBits = xModeBitsIntra( pcCU, uiMode, uiPU, uiPartOffset, uiDepth, uiInitTrDepth ); Double cost = (Double)uiSad + (Double)iModeBits * m_pcRdCost->getSqrtLambda(); CandNum += xUpdateCandList( uiMode, cost, numModesForFullRD, uiRdModeList, CandCostList ); } #if FAST_UDI_USE_MPM Int uiPreds[3] = {-1, -1, -1}; Int iMode = -1; Int numCand = pcCU->getIntraDirLumaPredictor( uiPartOffset, uiPreds, &iMode ); if( iMode >= 0 ) { numCand = iMode; } for( Int j=0; j < numCand; j++) { Bool mostProbableModeIncluded = false; Int mostProbableMode = uiPreds[j]; for( Int i=0; i < numModesForFullRD; i++) { mostProbableModeIncluded |= (mostProbableMode == uiRdModeList[i]); } if (!mostProbableModeIncluded) { uiRdModeList[numModesForFullRD++] = mostProbableMode; } } #endif // FAST_UDI_USE_MPM } else { for( Int i=0; i < numModesForFullRD; i++) { uiRdModeList[i] = i; } } //===== check modes (using r-d costs) ===== #if HHI_RQT_INTRA_SPEEDUP_MOD UInt uiSecondBestMode = MAX_UINT; Double dSecondBestPUCost = MAX_DOUBLE; #endif UInt uiBestPUMode = 0; UInt uiBestPUDistY = 0; UInt uiBestPUDistC = 0; Double dBestPUCost = MAX_DOUBLE; for( UInt uiMode = 0; uiMode < numModesForFullRD; uiMode++ ) { // set luma prediction mode UInt uiOrgMode = uiRdModeList[uiMode]; pcCU->setLumaIntraDirSubParts ( uiOrgMode, uiPartOffset, uiDepth + uiInitTrDepth ); // set context models m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST] ); // determine residual for partition UInt uiPUDistY = 0; UInt uiPUDistC = 0; Double dPUCost = 0.0; #if HHI_RQT_INTRA_SPEEDUP xRecurIntraCodingQT( pcCU, uiInitTrDepth, uiPartOffset, bLumaOnly, pcOrgYuv, pcPredYuv, pcResiYuv, uiPUDistY, uiPUDistC, true, dPUCost ); #else xRecurIntraCodingQT( pcCU, uiInitTrDepth, uiPartOffset, bLumaOnly, pcOrgYuv, pcPredYuv, pcResiYuv, uiPUDistY, uiPUDistC, dPUCost ); #endif // check r-d cost if( dPUCost < dBestPUCost ) { #if HHI_RQT_INTRA_SPEEDUP_MOD uiSecondBestMode = uiBestPUMode; dSecondBestPUCost = dBestPUCost; #endif uiBestPUMode = uiOrgMode; uiBestPUDistY = uiPUDistY; uiBestPUDistC = uiPUDistC; dBestPUCost = dPUCost; xSetIntraResultQT( pcCU, uiInitTrDepth, uiPartOffset, bLumaOnly, pcRecoYuv ); UInt uiQPartNum = pcCU->getPic()->getNumPartInCU() >> ( ( pcCU->getDepth(0) + uiInitTrDepth ) << 1 ); ::memcpy( m_puhQTTempTrIdx, pcCU->getTransformIdx() + uiPartOffset, uiQPartNum * sizeof( UChar ) ); ::memcpy( m_puhQTTempCbf[0], pcCU->getCbf( TEXT_LUMA ) + uiPartOffset, uiQPartNum * sizeof( UChar ) ); ::memcpy( m_puhQTTempCbf[1], pcCU->getCbf( TEXT_CHROMA_U ) + uiPartOffset, uiQPartNum * sizeof( UChar ) ); ::memcpy( m_puhQTTempCbf[2], pcCU->getCbf( TEXT_CHROMA_V ) + uiPartOffset, uiQPartNum * sizeof( UChar ) ); ::memcpy( m_puhQTTempTransformSkipFlag[0], pcCU->getTransformSkip(TEXT_LUMA) + uiPartOffset, uiQPartNum * sizeof( UChar ) ); ::memcpy( m_puhQTTempTransformSkipFlag[1], pcCU->getTransformSkip(TEXT_CHROMA_U) + uiPartOffset, uiQPartNum * sizeof( UChar ) ); ::memcpy( m_puhQTTempTransformSkipFlag[2], pcCU->getTransformSkip(TEXT_CHROMA_V) + uiPartOffset, uiQPartNum * sizeof( UChar ) ); } #if HHI_RQT_INTRA_SPEEDUP_MOD else if( dPUCost < dSecondBestPUCost ) { uiSecondBestMode = uiOrgMode; dSecondBestPUCost = dPUCost; } #endif } // Mode loop #if HHI_RQT_INTRA_SPEEDUP #if HHI_RQT_INTRA_SPEEDUP_MOD for( UInt ui =0; ui < 2; ++ui ) #endif { #if HHI_RQT_INTRA_SPEEDUP_MOD UInt uiOrgMode = ui ? uiSecondBestMode : uiBestPUMode; if( uiOrgMode == MAX_UINT ) { break; } #else UInt uiOrgMode = uiBestPUMode; #endif pcCU->setLumaIntraDirSubParts ( uiOrgMode, uiPartOffset, uiDepth + uiInitTrDepth ); // set context models m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST] ); // determine residual for partition UInt uiPUDistY = 0; UInt uiPUDistC = 0; Double dPUCost = 0.0; xRecurIntraCodingQT( pcCU, uiInitTrDepth, uiPartOffset, bLumaOnly, pcOrgYuv, pcPredYuv, pcResiYuv, uiPUDistY, uiPUDistC, false, dPUCost ); // check r-d cost if( dPUCost < dBestPUCost ) { uiBestPUMode = uiOrgMode; uiBestPUDistY = uiPUDistY; uiBestPUDistC = uiPUDistC; dBestPUCost = dPUCost; xSetIntraResultQT( pcCU, uiInitTrDepth, uiPartOffset, bLumaOnly, pcRecoYuv ); UInt uiQPartNum = pcCU->getPic()->getNumPartInCU() >> ( ( pcCU->getDepth(0) + uiInitTrDepth ) << 1 ); ::memcpy( m_puhQTTempTrIdx, pcCU->getTransformIdx() + uiPartOffset, uiQPartNum * sizeof( UChar ) ); ::memcpy( m_puhQTTempCbf[0], pcCU->getCbf( TEXT_LUMA ) + uiPartOffset, uiQPartNum * sizeof( UChar ) ); ::memcpy( m_puhQTTempCbf[1], pcCU->getCbf( TEXT_CHROMA_U ) + uiPartOffset, uiQPartNum * sizeof( UChar ) ); ::memcpy( m_puhQTTempCbf[2], pcCU->getCbf( TEXT_CHROMA_V ) + uiPartOffset, uiQPartNum * sizeof( UChar ) ); ::memcpy( m_puhQTTempTransformSkipFlag[0], pcCU->getTransformSkip(TEXT_LUMA) + uiPartOffset, uiQPartNum * sizeof( UChar ) ); ::memcpy( m_puhQTTempTransformSkipFlag[1], pcCU->getTransformSkip(TEXT_CHROMA_U) + uiPartOffset, uiQPartNum * sizeof( UChar ) ); ::memcpy( m_puhQTTempTransformSkipFlag[2], pcCU->getTransformSkip(TEXT_CHROMA_V) + uiPartOffset, uiQPartNum * sizeof( UChar ) ); } } // Mode loop #endif //--- update overall distortion --- uiOverallDistY += uiBestPUDistY; uiOverallDistC += uiBestPUDistC; //--- update transform index and cbf --- UInt uiQPartNum = pcCU->getPic()->getNumPartInCU() >> ( ( pcCU->getDepth(0) + uiInitTrDepth ) << 1 ); ::memcpy( pcCU->getTransformIdx() + uiPartOffset, m_puhQTTempTrIdx, uiQPartNum * sizeof( UChar ) ); ::memcpy( pcCU->getCbf( TEXT_LUMA ) + uiPartOffset, m_puhQTTempCbf[0], uiQPartNum * sizeof( UChar ) ); ::memcpy( pcCU->getCbf( TEXT_CHROMA_U ) + uiPartOffset, m_puhQTTempCbf[1], uiQPartNum * sizeof( UChar ) ); ::memcpy( pcCU->getCbf( TEXT_CHROMA_V ) + uiPartOffset, m_puhQTTempCbf[2], uiQPartNum * sizeof( UChar ) ); ::memcpy( pcCU->getTransformSkip(TEXT_LUMA) + uiPartOffset, m_puhQTTempTransformSkipFlag[0], uiQPartNum * sizeof( UChar ) ); ::memcpy( pcCU->getTransformSkip(TEXT_CHROMA_U) + uiPartOffset, m_puhQTTempTransformSkipFlag[1], uiQPartNum * sizeof( UChar ) ); ::memcpy( pcCU->getTransformSkip(TEXT_CHROMA_V) + uiPartOffset, m_puhQTTempTransformSkipFlag[2], uiQPartNum * sizeof( UChar ) ); //--- set reconstruction for next intra prediction blocks --- if( uiPU != uiNumPU - 1 ) { Bool bSkipChroma = false; Bool bChromaSame = false; UInt uiLog2TrSize = g_aucConvertToBit[ pcCU->getSlice()->getSPS()->getMaxCUWidth() >> ( pcCU->getDepth(0) + uiInitTrDepth ) ] + 2; if( !bLumaOnly && uiLog2TrSize == 2 ) { assert( uiInitTrDepth > 0 ); bSkipChroma = ( uiPU != 0 ); bChromaSame = true; } UInt uiCompWidth = pcCU->getWidth ( 0 ) >> uiInitTrDepth; UInt uiCompHeight = pcCU->getHeight( 0 ) >> uiInitTrDepth; UInt uiZOrder = pcCU->getZorderIdxInCU() + uiPartOffset; Pel* piDes = pcCU->getPic()->getPicYuvRec()->getLumaAddr( pcCU->getAddr(), uiZOrder ); UInt uiDesStride = pcCU->getPic()->getPicYuvRec()->getStride(); Pel* piSrc = pcRecoYuv->getLumaAddr( uiPartOffset ); UInt uiSrcStride = pcRecoYuv->getStride(); for( UInt uiY = 0; uiY < uiCompHeight; uiY++, piSrc += uiSrcStride, piDes += uiDesStride ) { for( UInt uiX = 0; uiX < uiCompWidth; uiX++ ) { piDes[ uiX ] = piSrc[ uiX ]; } } if( !bLumaOnly && !bSkipChroma ) { if( !bChromaSame ) { uiCompWidth >>= 1; uiCompHeight >>= 1; } piDes = pcCU->getPic()->getPicYuvRec()->getCbAddr( pcCU->getAddr(), uiZOrder ); uiDesStride = pcCU->getPic()->getPicYuvRec()->getCStride(); piSrc = pcRecoYuv->getCbAddr( uiPartOffset ); uiSrcStride = pcRecoYuv->getCStride(); for( UInt uiY = 0; uiY < uiCompHeight; uiY++, piSrc += uiSrcStride, piDes += uiDesStride ) { for( UInt uiX = 0; uiX < uiCompWidth; uiX++ ) { piDes[ uiX ] = piSrc[ uiX ]; } } piDes = pcCU->getPic()->getPicYuvRec()->getCrAddr( pcCU->getAddr(), uiZOrder ); piSrc = pcRecoYuv->getCrAddr( uiPartOffset ); for( UInt uiY = 0; uiY < uiCompHeight; uiY++, piSrc += uiSrcStride, piDes += uiDesStride ) { for( UInt uiX = 0; uiX < uiCompWidth; uiX++ ) { piDes[ uiX ] = piSrc[ uiX ]; } } } } //=== update PU data ==== pcCU->setLumaIntraDirSubParts ( uiBestPUMode, uiPartOffset, uiDepth + uiInitTrDepth ); pcCU->copyToPic ( uiDepth, uiPU, uiInitTrDepth ); } // PU loop if( uiNumPU > 1 ) { // set Cbf for all blocks UInt uiCombCbfY = 0; UInt uiCombCbfU = 0; UInt uiCombCbfV = 0; UInt uiPartIdx = 0; for( UInt uiPart = 0; uiPart < 4; uiPart++, uiPartIdx += uiQNumParts ) { uiCombCbfY |= pcCU->getCbf( uiPartIdx, TEXT_LUMA, 1 ); uiCombCbfU |= pcCU->getCbf( uiPartIdx, TEXT_CHROMA_U, 1 ); uiCombCbfV |= pcCU->getCbf( uiPartIdx, TEXT_CHROMA_V, 1 ); } for( UInt uiOffs = 0; uiOffs < 4 * uiQNumParts; uiOffs++ ) { pcCU->getCbf( TEXT_LUMA )[ uiOffs ] |= uiCombCbfY; pcCU->getCbf( TEXT_CHROMA_U )[ uiOffs ] |= uiCombCbfU; pcCU->getCbf( TEXT_CHROMA_V )[ uiOffs ] |= uiCombCbfV; } } //===== reset context models ===== m_pcRDGoOnSbacCoder->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]); //===== set distortion (rate and r-d costs are determined later) ===== ruiDistC = uiOverallDistC; pcCU->getTotalDistortion() = uiOverallDistY + uiOverallDistC; }
Void TComPrediction::predIntraLumaAng(TComPattern* pcTComPattern, UInt uiDirMode, Pel* piPred, UInt uiStride, Int iWidth, Int iHeight, Bool bAbove, Bool bLeft ) { Pel *pDst = piPred; Int *ptrSrc; assert( g_aucConvertToBit[ iWidth ] >= 0 ); // 4x 4 assert( g_aucConvertToBit[ iWidth ] <= 5 ); // 128x128 assert( iWidth == iHeight ); ptrSrc = pcTComPattern->getPredictorPtr( uiDirMode, g_aucConvertToBit[ iWidth ] + 2, m_piYuvExt ); // get starting pixel in block Int sw = 2 * iWidth + 1; // Create the prediction if ( uiDirMode == PLANAR_IDX ) { xPredIntraPlanar( ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight ); } else { if ( (iWidth > 16) || (iHeight > 16) ) { xPredIntraAng(g_bitDepthY, ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight, uiDirMode, bAbove, bLeft, false ); } else { xPredIntraAng(g_bitDepthY, ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight, uiDirMode, bAbove, bLeft, true ); if( (uiDirMode == DC_IDX ) && bAbove && bLeft ) { xDCPredFiltering( ptrSrc+sw+1, sw, pDst, uiStride, iWidth, iHeight); } } } }
UInt TComRdCost::calcHAD(Int bitDepth, Pel* pi0, Int iStride0, Pel* pi1, Int iStride1, Int iWidth, Int iHeight ) { UInt uiSum = 0; Int x, y; if ( ( (iWidth % 8) == 0 ) && ( (iHeight % 8) == 0 ) ) { for ( y=0; y<iHeight; y+= 8 ) { for ( x=0; x<iWidth; x+= 8 ) { uiSum += xCalcHADs8x8( &pi0[x], &pi1[x], iStride0, iStride1, 1 ); } pi0 += iStride0*8; pi1 += iStride1*8; } } else { assert(iWidth % 4 == 0 && iHeight % 4 == 0); for ( y=0; y<iHeight; y+= 4 ) { for ( x=0; x<iWidth; x+= 4 ) { uiSum += xCalcHADs4x4( &pi0[x], &pi1[x], iStride0, iStride1, 1 ); } pi0 += iStride0*4; pi1 += iStride1*4; } } return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(bitDepth-8); }
UInt TEncSearch::xModeBitsIntra( TComDataCU* pcCU, UInt uiMode, UInt uiPU, UInt uiPartOffset, UInt uiDepth, UInt uiInitTrDepth ) { // Reload only contexts required for coding intra mode information m_pcRDGoOnSbacCoder->loadIntraDirModeLuma( m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST] ); pcCU->setLumaIntraDirSubParts ( uiMode, uiPartOffset, uiDepth + uiInitTrDepth ); m_pcEntropyCoder->resetBits(); m_pcEntropyCoder->encodeIntraDirModeLuma ( pcCU, uiPartOffset); return m_pcEntropyCoder->getNumberOfWrittenBits(); }
UInt TEncSearch::xUpdateCandList( UInt uiMode, Double uiCost, UInt uiFastCandNum, UInt * CandModeList, Double * CandCostList ) { UInt i; UInt shift=0; while ( shift<uiFastCandNum && uiCost<CandCostList[ uiFastCandNum-1-shift ] ) shift++; if( shift!=0 ) { for(i=1; i<shift; i++) { CandModeList[ uiFastCandNum-i ] = CandModeList[ uiFastCandNum-1-i ]; CandCostList[ uiFastCandNum-i ] = CandCostList[ uiFastCandNum-1-i ]; } CandModeList[ uiFastCandNum-shift ] = uiMode; CandCostList[ uiFastCandNum-shift ] = uiCost; return 1; } return 0; }