First of all, to finish the classification tasks, we need a train function to train the data.
/** * @brief Train the input data by softmax algorithm * @param train Training data, input contains one\n * training example per column * @param labels The label of each training example */ template<typename T> void softmax<T>::train(const Eigen::Ref<const EigenMat> &train, const std::vector<int> &labels) { //#1 generate unique labels, because we need the //NumClass and generate the ground truth table auto const UniqueLabels = get_unique_labels(labels); auto const NumClass = UniqueLabels.size(); //#2 initialize weight and gradient weight_ = EigenMat::Random(NumClass, train.rows()); grad_ = EigenMat::Zero(NumClass, train.rows()); //#3 initialize ground truth auto const TrainCols = static_cast<int>(train.cols()); EigenMat const GroundTruth = get_ground_truth(NumClass, TrainCols, UniqueLabels, labels); //#4 create the random generator for mini-batch algorithm std::random_device rd; std::default_random_engine re(rd()); int const Batch = (get_batch_size(TrainCols)); int const RandomSize = TrainCols != Batch ? TrainCols - Batch - 1 : 0; std::uniform_int_distribution<int> uni_int(0, RandomSize); for(size_t i = 0; i != params_.max_iter_; ++i){ auto const Cols = uni_int(re); auto const &TrainBlock = train.block(0, Cols, train.rows(), Batch); auto const >Block = GroundTruth.block(0, Cols, NumClass, Batch); //#5 compute the cost of the cost function auto const Cost = compute_cost(TrainBlock, weight_, GTBlock); //#6 break the loop if meet the criteria if(std::abs(params_.cost_ - Cost) < params_.epsillon_ || Cost < 0){ break; } params_.cost_ = Cost; //#7 compute gradient compute_gradient(TrainBlock, GTBlock); //#8 update weight weight_.array() -= grad_.array() * params_.lrate_; } }
The most complicated part is #5 and #7, other part is trivial. To make #5 work, I need to finish the cost function(graph_00) and gradient descent(graph_01).
graph_00 |
graph_01 |
template<typename T> void softmax<T>::compute_hypothesis(Eigen::Ref<const EigenMat> const &train, Eigen::Ref<const EigenMat> const &weight) { hypothesis_.noalias() = weight * train; max_exp_power_ = hypothesis_.colwise().maxCoeff(); for(size_t i = 0; i != hypothesis_.cols(); ++i){ hypothesis_.col(i).array() -= max_exp_power_(0, i); } hypothesis_ = hypothesis_.array().exp(); weight_sum_ = hypothesis_.array().colwise().sum(); for(size_t i = 0; i != hypothesis_.cols(); ++i){ if(weight_sum_(0, i) != T(0)){ hypothesis_.col(i) /= weight_sum_(0, i); } } //prevent feeding 0 to log function hypothesis_ = (hypothesis_.array() != 0 ). select(hypothesis_, T(0.1)); }
After I have the hypothesis matrix, I can compute the cost and the gradient at ease.
template<typename T> double softmax<T>::compute_cost(const Eigen::Ref<const EigenMat> &train, const Eigen::Ref<const EigenMat> &weight, const Eigen::Ref<const EigenMat> &ground_truth) { compute_hypothesis(train, weight); double const NSamples = static_cast<double>(train.cols()); return -1.0 * (hypothesis_.array().log() * ground_truth.array()).sum() / NSamples + weight.array().pow(2.0).sum() * params_.lambda_ / 2.0; } template<typename T> void softmax<T>::compute_gradient(Eigen::Ref<const EigenMat> const &train, Eigen::Ref<const EigenMat> const &weight, Eigen::Ref<const EigenMat> const &ground_truth) { grad_.noalias() = (ground_truth.array() - hypothesis_.array()) .matrix() * train.transpose(); auto const NSamples = static_cast<double>(train.cols()); grad_.array() = grad_.array() / -NSamples + params_.lambda_ * weight.array(); }
The test results could see on this post.