Monday, 11 November 2013

machine learning--02 : Estimate the batch gradient descent is converge or not

    This is an exercise from the open course of machine learning, one of the purpose of this exercise is measure the batch gradient descent is converge or not.




    Equation 1 is the cost function, the procedures of measuring the convergence of gradient is compare different learning rate of the gradient descent, draw the graph and make sure the value of the cost function is decrease on every iteration.

    At first, I implement the cost function and batch gradient descent.


/**
 *@brief calculate the theta generated by the cost function 
 * of the linear regression
 */
template<typename T>
T cost_function(cv::Mat_<T> const &features, 
cv::Mat_<T> const &labels, 
cv::Mat_<T> const &theta)
{
    cv::Mat_<T> const temp = features * theta - labels;
    cv::Mat_<T> results = temp.t() * temp;
    results /= 2 * labels.rows;

    return *results.template ptr<T>(0);
}

/**
 *@brief find the costs of each iteration
 *@param features input sequence
 *@param labels output sequence
 *@param alpha determine the step of each iteration, smaller alpha would 
 * cause longer time to iterate but with higher chance to converge;
 * larger a;pha will run faster but with higher chance to divert.
 * Since this function has a fixed iteration time, so alpha 
 * only affect accuracy.
 *@param iterate iterate time
 *@return the costs of each iteration
 */
template<typename T>
cv::Mat_<T> batch_gradient_descent_cost(cv::Mat_<T> const &features, 
cv::Mat_<T> const &labels, 
T alpha = 0.07, size_t iterate = 1)
{
    cv::Mat_<T> theta = cv::Mat_<T>::zeros(features.cols, 1);
    cv::Mat_<T> results;
    T const ratio = alpha / features.rows;
    for(size_t i = 0; i != iterate; ++i){
        cv::Mat_<T> const data = ratio * 
                linear_regression(features, labels, theta);
        results.push_back(cost_function(features, labels, theta));
        theta -= data;
    }

    return results;
}

Draw the cost vs iteration graph by qwt.


/**
 * @brief draw and show the charts of cost function vs iteration
 * @param plot a 2-d plotting widget
 * @param features input(features)
 * @param labels output(target)
 * @param a
 */
template<typename T>
void cost_vs_iteration(simple2DPlot &plot, cv::Mat_<T> const &features, 
cv::Mat_<T> const &labels, QApplication &a)
{
    size_t const iterate_times = 50;
    T const ratio[] = {0.01, 0.03, 0.1, 0.3, 1, 1.2};
    QColor const colors[] = { {255, 0, 0}, {0, 255, 0}, {0, 0, 255}, 
                              {255, 255, 0}, {255, 128, 128}, 
                              {128, 0, 128}};
    std::vector<T> iterates(50);
    std::iota(std::begin(iterates), std::end(iterates), 1);
    for(size_t i = 0; i != sizeof(ratio) / sizeof(T); ++i){
        cv::Mat_<T> const costs = batch_gradient_descent_cost<T>(features, 
                                       labels, ratio[i], iterate_times);
        plot.insert_curve(std::begin(iterates), std::end(iterates), 
                          std::begin(costs));
        plot.get_curve(i).setTitle(
          QString::fromStdString(std::to_string(ratio[i])));
        plot.get_curve(i).setPen(colors[i]);
        plot.get_curve(i).setStyle(QwtPlotCurve::Lines);
    }

    plot.replot();
    plot.render_document("costs vs iteration", {300, 200});

    plot.resize(600, 400);
    plot.show();
    a.exec();

    plot.clear();
    cv::Mat_<T> const costs = 
    batch_gradient_descent_cost<T>(features, labels, 1.3, iterate_times);
    plot.insert_curve(std::begin(iterates), std::end(iterates), 
                      std::begin(costs));
    plot.get_curve(0).setTitle("1.3");
    plot.get_curve(0).setStyle(QwtPlotCurve::Lines);
    plot.render_document("diverge", {300, 200});

    plot.replot();
    plot.show();

    a.exec();
}


graph-00 : converge

graph-01 : diverge


 The implementation of simple2DPlot, the codes of main project.