



    classifier.train( cascadeDirName,//分类器存放地址
                      numPos, numNeg,//numpos表示每一个训练阶段正样本数量,numNeg每一个训练阶段负样本数
                      precalcValBufSize, precalcIdxBufSize,
                      acceptanceRatioBreakValue );
    return 0;


bool CvCascadeClassifier::train( const string _cascadeDirName,//级联分类器训练
                                const string _posFilename,
                                const string _negFilename,
                                int _numPos, int _numNeg,
                                int _precalcValBufSize, int _precalcIdxBufSize,
                                int _numStages,
                                const CvCascadeParams& _cascadeParams,
                                const CvFeatureParams& _featureParams,//选择了Harr特征
                                const CvCascadeBoostParams& _stageParams,
                                bool baseFormatSave,
                                double acceptanceRatioBreakValue )
    // Start recording clock ticks for training time output
    const clock_t begin_time = clock();

    if( _cascadeDirName.empty() || _posFilename.empty() || _negFilename.empty() )
        CV_Error( CV_StsBadArg, "_cascadeDirName or _bgfileName or _vecFileName is NULL" );

    string dirName;
    if (_cascadeDirName.find_last_of("/\\") == (_cascadeDirName.length() - 1) )
        dirName = _cascadeDirName;
        dirName = _cascadeDirName + '/';

    numPos = _numPos;
    numNeg = _numNeg;
    numStages = _numStages;
    if ( !imgReader.create( _posFilename, _negFilename, _cascadeParams.winSize ) )
        cout << "Image reader can not be created from -vec " << _posFilename
                << " and -bg " << _negFilename << "." << endl;
        return false;
    if ( !load( dirName ) )
        cascadeParams = _cascadeParams;
        featureParams = CvFeatureParams::create(cascadeParams.featureType);
        stageParams = makePtr<CvCascadeBoostParams>();
        *stageParams = _stageParams;
        featureEvaluator = CvFeatureEvaluator::create(cascadeParams.featureType);
        featureEvaluator->init( featureParams, numPos + numNeg, cascadeParams.winSize );
        stageClassifiers.reserve( numStages );
        // Make sure that if model parameters are preloaded, that people are aware of this,
        // even when passing other parameters to the training command
        cout << "---------------------------------------------------------------------------------" << endl;
        cout << "Training parameters are pre-loaded from the parameter file in data folder!" << endl;
        cout << "Please empty this folder if you want to use a NEW set of training parameters." << endl;
        cout << "---------------------------------------------------------------------------------" << endl;
    cout << "PARAMETERS:" << endl;
    cout << "cascadeDirName: " << _cascadeDirName << endl;
    cout << "vecFileName: " << _posFilename << endl;
    cout << "bgFileName: " << _negFilename << endl;
    cout << "numPos: " << _numPos << endl;
    cout << "numNeg: " << _numNeg << endl;
    cout << "numStages: " << numStages << endl;
    cout << "precalcValBufSize[Mb] : " << _precalcValBufSize << endl;
    cout << "precalcIdxBufSize[Mb] : " << _precalcIdxBufSize << endl;
    cout << "acceptanceRatioBreakValue : " << acceptanceRatioBreakValue << endl;

    int startNumStages = (int)stageClassifiers.size();
    if ( startNumStages > 1 )
        cout << endl << "Stages 0-" << startNumStages-1 << " are loaded" << endl;
    else if ( startNumStages == 1)
        cout << endl << "Stage 0 is loaded" << endl;
    double requiredLeafFARate = pow( (double) stageParams->maxFalseAlarm, (double) numStages ) /
    double tempLeafFARate;

    for( int i = startNumStages; i < numStages; i++ )//进行每一阶段的分类器训练
        cout << endl << "===== TRAINING " << i << "-stage =====" << endl;
        cout << "<BEGIN" << endl;

        if ( !updateTrainingSet( requiredLeafFARate, tempLeafFARate ) )//
			//从正负样本集合里挑选出numPos+numNeg个样本到集合CvCascadeImageReader imgReader中,
			//样本数不够, 退出训练
            cout << "Train dataset for temp stage can not be filled. "
                    "Branch training terminated." << endl;//训练停止条件
        if( tempLeafFARate <= requiredLeafFARate )
            cout << "Required leaf false alarm rate achieved. "
                    "Branch training terminated." << endl;
        if( (tempLeafFARate <= acceptanceRatioBreakValue) && (acceptanceRatioBreakValue >= 0) ){
            cout << "The required acceptanceRatio for the model has been reached to avoid overfitting of trainingdata. "
                    "Branch training terminated." << endl;

        Ptr<CvCascadeBoost> tempStage = makePtr<CvCascadeBoost>();
        bool isStageTrained = tempStage->train( featureEvaluator,
                                                curNumSamples, _precalcValBufSize, _precalcIdxBufSize,
                                                *stageParams );//训练一个强分类器
        cout << "END>" << endl;


        stageClassifiers.push_back( tempStage );//把训练好的强分类器加入到容器中

        // save params
        if( i == 0)
            std::string paramsFilename = dirName + CC_PARAMS_FILENAME;
            FileStorage fs( paramsFilename, FileStorage::WRITE);
            if ( !fs.isOpened() )
                cout << "Parameters can not be written, because file " << paramsFilename
                        << " can not be opened." << endl;
                return false;
            fs << FileStorage::getDefaultObjectName(paramsFilename) << "{";
            writeParams( fs );
            fs << "}";
        // save current stage
        char buf[10];
        sprintf(buf, "%s%d", "stage", i );
        string stageFilename = dirName + buf + ".xml";
        FileStorage fs( stageFilename, FileStorage::WRITE );
        if ( !fs.isOpened() )
            cout << "Current stage can not be written, because file " << stageFilename
                    << " can not be opened." << endl;
            return false;
        fs << FileStorage::getDefaultObjectName(stageFilename) << "{";
        tempStage->write( fs, Mat() );
        fs << "}";

        // Output training time up till now
        float seconds = float( clock () - begin_time ) / CLOCKS_PER_SEC;
        int days = int(seconds) / 60 / 60 / 24;
        int hours = (int(seconds) / 60 / 60) % 24;
        int minutes = (int(seconds) / 60) % 60;
        int seconds_left = int(seconds) % 60;
        cout << "Training until now has taken " << days << " days " << hours << " hours " << minutes << " minutes " << seconds_left <<" seconds." << endl;

    if(stageClassifiers.size() == 0)
        cout << "Cascade classifier can't be trained. Check the used training parameters." << endl;
        return false;

    save( dirName + CC_CASCADE_FILENAME, baseFormatSave );

    return true;

进入bool isStageTrained = tempStage->train()//训练强分类器

bool CvCascadeBoost::train( const CvFeatureEvaluator* _featureEvaluator,
                           int _numSamples,
                           int _precalcValBufSize, int _precalcIdxBufSize,
                           const CvCascadeBoostParams& _params )
    bool isTrained = false;
    CV_Assert( !data );
    data = new CvCascadeBoostTrainData( _featureEvaluator, _numSamples,
                                        _precalcValBufSize, _precalcIdxBufSize, _params );//setdata(),目前暂且知道这里是调用preCalculate计算所有的特征值
    CvMemStorage *storage = cvCreateMemStorage();
    weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
    storage = 0;

    set_params( _params );
    if ( (_params.boost_type == LOGIT) || (_params.boost_type == GENTLE) )

    update_weights( 0 );//权重初始化

    cout << "+----+---------+---------+" << endl;
    cout << "|  N |    HR   |    FA   |" << endl;
    cout << "+----+---------+---------+" << endl;

        CvCascadeBoostTree* tree = new CvCascadeBoostTree;
        if( !tree->train( data, subsample_mask, this ) )//训练弱分类器,一个决策树
            delete tree;
        cvSeqPush( weak, &tree );//把弱分类器添加到强分类器里面 
        update_weights( tree );//更新权重。
        if( cvCountNonZero(subsample_mask) == 0 )
    while( !isErrDesired() && (weak->total < params.weak_count) );//训练出的弱分类器的个数小于参数值:100并且虚警率不小于0.5则训练继续。

    if(weak->total > 0)
        data->is_classifier = true;
        isTrained = true;

    return isTrained;

tree->train( data, subsample_mask, this ) )//训练弱分类器,一个决策树

CvBoostTree::train( CvDTreeTrainData* _train_data,
                    const CvMat* _subsample_idx, CvBoost* _ensemble )
    ensemble = _ensemble;
    data = _train_data;
    data->shared = true;
    return do_train( _subsample_idx );//传递的是:_subsample_idx子样本索引

进入do_train() ,在这里获得弱分类器的是生成一个决策树(李航.统计学习方法中有关决策树的论述)

bool CvDTree::do_train( const CvMat* _subsample_idx )//训练弱分类器
	//1)对于每个特征 f,计算所有训练样本的特征值,并将其排序。
	//	扫描一遍排好序的特征值,对排好序的表中的每个元素,计算下面四个值:
	//	全部人脸样本的权重的和t1;
	//	全部非人脸样本的权重的和t0;
	//	在此元素之前的人脸样本的权重的和s1;
	//	在此元素之前的非人脸样本的权重的和s0;
	//	2)最终求得每个元素的分类误差
	//	在表中寻找r值最小的元素,则该元素作为最优阈值。有了该阈值,我们的第一个最优弱分类器就诞生了。
<span style="white-space:pre">	</span>//以上的弱分类器的寻找是在网上比较常见的描述但是在opencv_traincascade中,使用的决策树()
    bool result = false;

    CV_FUNCNAME( "CvDTree::do_train" );


    root = data->subsample_data( _subsample_idx );//子样本数据

    CV_CALL( try_split_node(root));//尝试分裂节点,生成树

    if( root->split )
        CV_Assert( root->left );
        CV_Assert( root->right );

        if( data->params.cv_folds > 0 )
            CV_CALL( prune_cv() );//修建生成树

        if( !data->shared )

        result = true;


    return result;

 CV_CALL( try_split_node(root));//尝试分裂节点,生成树

void CvDTree::try_split_node( CvDTreeNode* node )//分裂节点,生成树
    CvDTreeSplit* best_split = 0;
    int i, n = node->sample_count, vi;
    bool can_split = true;
    double quality_scale;

    calc_node_value( node );//计算节点值(就是一个决策树),根节点值,最优特征

    if( node->sample_count <= data->params.min_sample_count ||
        node->depth >= data->params.max_depth )
        can_split = false;//根据条件停止生成树

    if( can_split && data->is_classifier )
        // check if we have a "pure" node,
        // we assume that cls_count is filled by calc_node_value()
        int* cls_count = data->counts->data.i;
        int nz = 0, m = data->get_num_classes();
        for( i = 0; i < m; i++ )
            nz += cls_count[i] != 0;
        if( nz == 1 ) // there is only one class
            can_split = false;
    else if( can_split )
        if( sqrt(node->node_risk)/n < data->params.regression_accuracy )
            can_split = false;

    if( can_split )
        best_split = find_best_split(node);
        // TODO: check the split quality ...
        node->split = best_split;
    if( !can_split || !best_split )

    quality_scale = calc_node_dir( node );
    if( data->params.use_surrogates )
        // find all the surrogate splits
        // and sort them by their similarity to the primary one
        for( vi = 0; vi < data->var_count; vi++ )
            CvDTreeSplit* split;
            int ci = data->get_var_type(vi);

            if( vi == best_split->var_idx )

            if( ci >= 0 )
                split = find_surrogate_split_cat( node, vi );
                split = find_surrogate_split_ord( node, vi );

            if( split )
                // insert the split
                CvDTreeSplit* prev_split = node->split;
                split->quality = (float)(split->quality*quality_scale);

                while( prev_split->next &&
                       prev_split->next->quality > split->quality )
                    prev_split = prev_split->next;
                split->next = prev_split->next;
                prev_split->next = split;
    split_node_data( node );
    try_split_node( node->left );//左节点
    try_split_node( node->right );//右节点

calc_node_value( node );//计算节点值(就是一个决策树),根节点值,最优特征

void CvDTree::calc_node_value( CvDTreeNode* node )//得到一个决策树,一个弱分类器,根节点就是最优特征
    int i, j, k, n = node->sample_count, cv_n = data->params.cv_folds;
    int m = data->get_num_classes();

    int base_size = data->is_classifier ? m*cv_n*sizeof(int) : 2*cv_n*sizeof(double)+cv_n*sizeof(int);
    int ext_size = n*(sizeof(int) + (data->is_classifier ? sizeof(int) : sizeof(int)+sizeof(float)));
    cv::AutoBuffer<uchar> inn_buf(base_size + ext_size);
    uchar* base_buf = (uchar*)inn_buf;
    uchar* ext_buf = base_buf + base_size;

    int* cv_labels_buf = (int*)ext_buf;
    const int* cv_labels = data->get_cv_labels(node, cv_labels_buf);

    if( data->is_classifier )
        // in case of classification tree:决策树中的分类树
        //  * node value is the label of the class that has the largest weight in the node.
        //  * node risk is the weighted number of misclassified samples,
        //  * j-th cross-validation fold value and risk are calculated as above,
        //    but using the samples with cv_labels(*)!=j.
        //  * j-th cross-validation fold error is calculated as the weighted number of
        //    misclassified samples with cv_labels(*)==j.

        // compute the number of instances of each class
        int* cls_count = data->counts->data.i;
        int* responses_buf = cv_labels_buf + n;
        const int* responses = data->get_class_labels(node, responses_buf);
        int* cv_cls_count = (int*)base_buf;
        double max_val = -1, total_weight = 0;
        int max_k = -1;
        double* priors = data->priors_mult->data.db;

        for( k = 0; k < m; k++ )
            cls_count[k] = 0;

        if( cv_n == 0 )
            for( i = 0; i < n; i++ )
            for( j = 0; j < cv_n; j++ )
                for( k = 0; k < m; k++ )
                    cv_cls_count[j*m + k] = 0;

            for( i = 0; i < n; i++ )
                j = cv_labels[i]; k = responses[i];
                cv_cls_count[j*m + k]++;

            for( j = 0; j < cv_n; j++ )
                for( k = 0; k < m; k++ )
                    cls_count[k] += cv_cls_count[j*m + k];

        if( data->have_priors && node->parent == 0 )
            // compute priors_mult from priors, take the sample ratio into account.
            double sum = 0;
            for( k = 0; k < m; k++ )
                int n_k = cls_count[k];
                priors[k] = data->priors->data.db[k]*(n_k ? 1./n_k : 0.);
                sum += priors[k];
            sum = 1./sum;
            for( k = 0; k < m; k++ )
                priors[k] *= sum;

        for( k = 0; k < m; k++ )
            double val = cls_count[k]*priors[k];
            total_weight += val;
            if( max_val < val )
                max_val = val;
                max_k = k;

        node->class_idx = max_k;
        node->value = data->cat_map->data.i[
            data->cat_ofs->data.i[data->cat_var_count] + max_k];
        node->node_risk = total_weight - max_val;

        for( j = 0; j < cv_n; j++ )
            double sum_k = 0, sum = 0, max_val_k = 0;
            max_val = -1; max_k = -1;

            for( k = 0; k < m; k++ )
                double w = priors[k];
                double val_k = cv_cls_count[j*m + k]*w;
                double val = cls_count[k]*w - val_k;
                sum_k += val_k;
                sum += val;
                if( max_val < val )
                    max_val = val;
                    max_val_k = val_k;
                    max_k = k;

            node->cv_Tn[j] = INT_MAX;
            node->cv_node_risk[j] = sum - max_val;
            node->cv_node_error[j] = sum_k - max_val_k;
        // in case of regression tree:决策树种的回归树
        //  * node value is 1/n*sum_i(Y_i), where Y_i is i-th response,
        //    n is the number of samples in the node.节点样本数
        //  * node risk is the sum of squared errors: sum_i((Y_i - <node_value>)^2);平方误差
        //  * j-th cross-validation fold value and risk are calculated as above,较差验证折叠值和平方误差计算
        //    but using the samples with cv_labels(*)!=j.
        //  * j-th cross-validation fold error is calculated
        //    using samples with cv_labels(*)==j as the test subset:
        //    error_j = sum_(i,cv_labels(i)==j)((Y_i - <node_value_j>)^2),
        //    where node_value_j is the node value calculated
        //    as described in the previous bullet, and summation is done
        //    over the samples with cv_labels(*)==j.

        double sum = 0, sum2 = 0;
        float* values_buf = (float*)(cv_labels_buf + n);
        int* sample_indices_buf = (int*)(values_buf + n);
        const float* values = data->get_ord_responses(node, values_buf, sample_indices_buf);
        double *cv_sum = 0, *cv_sum2 = 0;
        int* cv_count = 0;

        if( cv_n == 0 )
            for( i = 0; i < n; i++ )
                double t = values[i];
                sum += t;
                sum2 += t*t;
            cv_sum = (double*)base_buf;
            cv_sum2 = cv_sum + cv_n;
            cv_count = (int*)(cv_sum2 + cv_n);

            for( j = 0; j < cv_n; j++ )
                cv_sum[j] = cv_sum2[j] = 0.;
                cv_count[j] = 0;

            for( i = 0; i < n; i++ )
                j = cv_labels[i];
                double t = values[i];
                double s = cv_sum[j] + t;
                double s2 = cv_sum2[j] + t*t;
                int nc = cv_count[j] + 1;
                cv_sum[j] = s;
                cv_sum2[j] = s2;
                cv_count[j] = nc;

            for( j = 0; j < cv_n; j++ )
                sum += cv_sum[j];
                sum2 += cv_sum2[j];

        node->node_risk = sum2 - (sum/n)*sum;//平方误差总和
        node->value = sum/n;

        for( j = 0; j < cv_n; j++ )
            double s = cv_sum[j], si = sum - s;
            double s2 = cv_sum2[j], s2i = sum2 - s2;
            int c = cv_count[j], ci = n - c;
            double r = si/MAX(ci,1);
            node->cv_node_risk[j] = s2i - r*r*ci;
            node->cv_node_error[j] = s2 - 2*r*s + c*r*r;
            node->cv_Tn[j] = INT_MAX;

        CvCascadeBoostTree* tree = new CvCascadeBoostTree;
        if( !tree->train( data, subsample_mask, this ) )//训练弱分类器,一个决策树
            delete tree;
        cvSeqPush( weak, &tree );//把弱分类器添加到强分类器里面 
        update_weights( tree );//更新权重。
        if( cvCountNonZero(subsample_mask) == 0 )
    while( !isErrDesired() && (weak->total < params.weak_count) )

进入权重更新:update_weights(tree),在更新权重的过程中,里面有四个Adaboost的更新算法discrete Adaboost、Real Adaboost、Gentle Adaboost和  LogitBoost。其中的算法描述在《基于子空间的人脸识别》中有详细介绍,网络上的描述内容不详细 。

void CvCascadeBoost::update_weights( CvBoostTree* tree )
    int n = data->sample_count;//1000个训练样本
    double sumW = 0.;
    int step = 0;
    float* fdata = 0;
    int *sampleIdxBuf;
    const int* sampleIdx = 0;
    int inn_buf_size = ((params.boost_type == LOGIT) || (params.boost_type == GENTLE) ? n*sizeof(int) : 0) +
                       ( !tree ? n*sizeof(int) : 0 );
    cv::AutoBuffer<uchar> inn_buf(inn_buf_size);
    uchar* cur_inn_buf_pos = (uchar*)inn_buf;
    if ( (params.boost_type == LOGIT) || (params.boost_type == GENTLE) )
        step = CV_IS_MAT_CONT(data->responses_copy->type) ?
            1 : data->responses_copy->step / CV_ELEM_SIZE(data->responses_copy->type);
        fdata = data->responses_copy->data.fl;
        sampleIdxBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(sampleIdxBuf + n);
        sampleIdx = data->get_sample_indices( data->data_root, sampleIdxBuf );
    CvMat* buf = data->buf;
    size_t length_buf_row = data->get_length_subbuf();
    if( !tree ) // before training the first tree, initialize weights and other parameters初始化权重和其他参数
        int* classLabelsBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(classLabelsBuf + n);
        const int* classLabels = data->get_class_labels(data->data_root, classLabelsBuf);
        // in case of logitboost and gentle adaboost each weak tree is a regression tree,
        // so we need to convert class labels to floating-point values
        double w0 = 1./n;
        double p[2] = { 1, 1 };

        cvReleaseMat( &orig_response );
        cvReleaseMat( &sum_response );
        cvReleaseMat( &weak_eval );
        cvReleaseMat( &subsample_mask );
        cvReleaseMat( &weights );

        orig_response = cvCreateMat( 1, n, CV_32S );
        weak_eval = cvCreateMat( 1, n, CV_64F );//1*1000的矩阵
        subsample_mask = cvCreateMat( 1, n, CV_8U );
        weights = cvCreateMat( 1, n, CV_64F );//1*1000的矩阵
        subtree_weights = cvCreateMat( 1, n + 2, CV_64F );

        if (data->is_buf_16u)
            unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*length_buf_row +
                data->data_root->offset + (size_t)(data->work_var_count-1)*data->sample_count);
            for( int i = 0; i < n; i++ )
                // save original categorical responses {0,1}, convert them to {-1,1}
                orig_response->data.i[i] = classLabels[i]*2 - 1;//
                // make all the samples active at start.
                // later, in trim_weights() deactivate/reactive again some, if need
                subsample_mask->data.ptr[i] = (uchar)1;
                // make all the initial weights the same.初始权值相同
                weights->data.db[i] = w0*p[classLabels[i]];//weight->data.db = 0.001.
                // set the labels to find (from within weak tree learning proc)
                // the particular sample weight, and where to store the response.
                labels[i] = (unsigned short)i;
            int* labels = buf->data.i + data->data_root->buf_idx*length_buf_row +
                data->data_root->offset + (size_t)(data->work_var_count-1)*data->sample_count;

            for( int i = 0; i < n; i++ )
                // save original categorical responses {0,1}, convert them to {-1,1}
                orig_response->data.i[i] = classLabels[i]*2 - 1;
                subsample_mask->data.ptr[i] = (uchar)1;
                weights->data.db[i] = w0*p[classLabels[i]];
                labels[i] = i;

        if( params.boost_type == LOGIT )
            sum_response = cvCreateMat( 1, n, CV_64F );

            for( int i = 0; i < n; i++ )
                sum_response->data.db[i] = 0;
                fdata[sampleIdx[i]*step] = orig_response->data.i[i] > 0 ? 2.f : -2.f;

            // in case of logitboost each weak tree is a regression tree.
            // the target function values are recalculated for each of the trees
            data->is_classifier = false;
        else if( params.boost_type == GENTLE )
            for( int i = 0; i < n; i++ )
                fdata[sampleIdx[i]*step] = (float)orig_response->data.i[i];

            data->is_classifier = false;
        // at this moment, for all the samples that participated in the training of the most
        // recent weak classifier we know the responses. For other samples we need to compute them
        if( have_subsample )
            // invert the subsample mask
            cvXorS( subsample_mask, cvScalar(1.), subsample_mask );

            // run tree through all the non-processed samples
            for( int i = 0; i < n; i++ )
                if( subsample_mask->data.ptr[i] )
                    weak_eval->data.db[i] = ((CvCascadeBoostTree*)tree)->predict( i )->value;//有可能是遍历所有的训练样本

        // now update weights and other parameters for each type of boosting
        if( params.boost_type == DISCRETE )
            // Discrete AdaBoost:
            //   weak_eval[i] (=f(x_i)) is in {-1,1}弱分类器的形式和取值
            //   err = sum(w_i*(f(x_i) != y_i))/sum(w_i);y_i对应于(x_i, y_i)表示训练样本x_i表示Haar特征表示,y_i表示每个样本对应的标签
            //   C = log((1-err)/err)加权系数
            //   w_i *= exp(C*(f(x_i) != y_i))更新权值

            double C, err = 0.;
            double scale[] = { 1., 0. };

            for( int i = 0; i < n; i++ )
                double w = weights->data.db[i];//在update_weights(0)中.db[i]的值是0.001
                sumW += w;//暂时理解为权值和
                err += w*(weak_eval->data.db[i] != orig_response->data.i[i]);
						//orig_response->data.i[i]表示训练样本的标签{1, -1}正负样本

            if( sumW != 0 )
                err /= sumW;
            C = err = -logRatio( err );
            scale[1] = exp(err);

            sumW = 0;
            for( int i = 0; i < n; i++ )
                double w = weights->data.db[i]*
                    scale[weak_eval->data.db[i] != orig_response->data.i[i]];//增大分类错误样本权重,不等scale[0] = 1,
																			 //较小分类正确样本权重相等scale[1] = 0;
                sumW += w;
                weights->data.db[i] = w;//权值更新

            tree->scale( C );//node 放缩
        else if( params.boost_type == REAL )
            // Real AdaBoost:是实数输出的Adaboost算法
            //   weak_eval[i] = f(x_i) = 0.5*log(p(x_i)/(1-p(x_i))), p(x_i)=P(y=1|x_i);弱分类器表示形式
            //   w_i *= exp(-y_i*f(x_i))更新权重

            for( int i = 0; i < n; i++ )
                weak_eval->data.db[i] *= -orig_response->data.i[i];

            cvExp( weak_eval, weak_eval );

            for( int i = 0; i < n; i++ )
                double w = weights->data.db[i]*weak_eval->data.db[i];
                sumW += w;
                weights->data.db[i] = w;
        else if( params.boost_type == LOGIT )
            // LogitBoost:
            //   weak_eval[i] = f(x_i) in [-z_max,z_max]
            //   sum_response = F(x_i).
            //   F(x_i) += 0.5*f(x_i)
            //   p(x_i) = exp(F(x_i))/(exp(F(x_i)) + exp(-F(x_i))=1/(1+exp(-2*F(x_i)))
            //   reuse weak_eval: weak_eval[i] <- p(x_i)
            //   w_i = p(x_i)*1(1 - p(x_i))
            //   z_i = ((y_i+1)/2 - p(x_i))/(p(x_i)*(1 - p(x_i)))
            //   store z_i to the data->data_root as the new target responses

            const double lbWeightThresh = FLT_EPSILON;
            const double lbZMax = 10.;

            for( int i = 0; i < n; i++ )
                double s = sum_response->data.db[i] + 0.5*weak_eval->data.db[i];
                sum_response->data.db[i] = s;
                weak_eval->data.db[i] = -2*s;

            cvExp( weak_eval, weak_eval );

            for( int i = 0; i < n; i++ )
                double p = 1./(1. + weak_eval->data.db[i]);
                double w = p*(1 - p), z;
                w = MAX( w, lbWeightThresh );
                weights->data.db[i] = w;
                sumW += w;
                if( orig_response->data.i[i] > 0 )
                    z = 1./p;
                    fdata[sampleIdx[i]*step] = (float)min(z, lbZMax);
                    z = 1./(1-p);
                    fdata[sampleIdx[i]*step] = (float)-min(z, lbZMax);
            // Gentle AdaBoost:是基于加性回归模型的Adaboost算法
            //   weak_eval[i] = f(x_i) in [-1,1];弱分类器利用x_i和y_i做加权最小二乘回归,得到弱分类器
            //   w_i *= exp(-y_i*f(x_i))
            assert( params.boost_type == GENTLE );//如果为假,输出错误信息,然后调用abort终止程序运行

            for( int i = 0; i < n; i++ )
                weak_eval->data.db[i] *= -orig_response->data.i[i];//这个循环是:弱分类分类结果

            cvExp( weak_eval, weak_eval );

            for( int i = 0; i < n; i++ )
                double w = weights->data.db[i] * weak_eval->data.db[i];
                weights->data.db[i] = w;
                sumW += w;

    // renormalize weights归一化权重,使得权重之和 = 1
    if( sumW > FLT_EPSILON )
        sumW = 1./sumW;
        for( int i = 0; i < n; ++i )
            weights->data.db[i] *= sumW;
	double original_ = *(weights->data.db);//weights.db = 0.001,1/1000样本初始权重

