Skip to content

Commit

Permalink
fix refactoring bug in voting parallel (microsoft#3089)
Browse files Browse the repository at this point in the history
  • Loading branch information
guolinke authored and odimka committed May 17, 2020
1 parent 036b627 commit 600b69f
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 34 deletions.
51 changes: 29 additions & 22 deletions src/treelearner/feature_histogram.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1147,32 +1147,15 @@ class HistogramPool {
}
}

void DynamicChangeSize(const Dataset* train_data, bool is_hist_colwise,
const Config* config, int cache_size, int total_size) {
if (feature_metas_.empty()) {
SetFeatureInfo<true, true>(train_data, config, &feature_metas_);
uint64_t bin_cnt_over_features = 0;
for (int i = 0; i < train_data->num_features(); ++i) {
bin_cnt_over_features +=
static_cast<uint64_t>(feature_metas_[i].num_bin);
}
Log::Info("Total Bins %d", bin_cnt_over_features);
}
int old_cache_size = static_cast<int>(pool_.size());
Reset(cache_size, total_size);

if (cache_size > old_cache_size) {
pool_.resize(cache_size);
data_.resize(cache_size);
}
static int GetNumTotalHistogramBins(const Dataset* train_data,
bool is_hist_colwise, std::vector<int>* offsets) {
int num_total_bin = static_cast<int>(train_data->NumTotalBin());

std::vector<int> offsets;
offsets->clear();
if (is_hist_colwise) {
int offset = 0;
for (int j = 0; j < train_data->num_features(); ++j) {
offset += train_data->SubFeatureBinOffset(j);
offsets.push_back(offset);
offsets->push_back(offset);
auto num_bin = train_data->FeatureNumBin(j);
if (train_data->FeatureBinMapper(j)->GetMostFreqBin() == 0) {
num_bin -= 1;
Expand All @@ -1182,13 +1165,37 @@ class HistogramPool {
} else {
num_total_bin = 1;
for (int j = 0; j < train_data->num_features(); ++j) {
offsets.push_back(num_total_bin);
offsets->push_back(num_total_bin);
num_total_bin += train_data->FeatureBinMapper(j)->num_bin();
if (train_data->FeatureBinMapper(j)->GetMostFreqBin() == 0) {
num_total_bin -= 1;
}
}
}
return num_total_bin;
}

void DynamicChangeSize(const Dataset* train_data, bool is_hist_colwise,
const Config* config, int cache_size, int total_size) {
if (feature_metas_.empty()) {
SetFeatureInfo<true, true>(train_data, config, &feature_metas_);
uint64_t bin_cnt_over_features = 0;
for (int i = 0; i < train_data->num_features(); ++i) {
bin_cnt_over_features +=
static_cast<uint64_t>(feature_metas_[i].num_bin);
}
Log::Info("Total Bins %d", bin_cnt_over_features);
}
int old_cache_size = static_cast<int>(pool_.size());
Reset(cache_size, total_size);

if (cache_size > old_cache_size) {
pool_.resize(cache_size);
data_.resize(cache_size);
}
std::vector<int> offsets;
int num_total_bin =
this->GetNumTotalHistogramBins(train_data, is_hist_colwise, &offsets);
OMP_INIT_EX();
#pragma omp parallel for schedule(static)
for (int i = old_cache_size; i < cache_size; ++i) {
Expand Down
19 changes: 7 additions & 12 deletions src/treelearner/voting_parallel_tree_learner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,20 +63,15 @@ void VotingParallelTreeLearner<TREELEARNER_T>::Init(const Dataset* train_data, b
// initialize histograms for global
smaller_leaf_histogram_array_global_.reset(new FeatureHistogram[this->num_features_]);
larger_leaf_histogram_array_global_.reset(new FeatureHistogram[this->num_features_]);
auto num_total_bin = train_data->NumTotalBin();
smaller_leaf_histogram_data_.resize(num_total_bin);
larger_leaf_histogram_data_.resize(num_total_bin);
std::vector<int> offsets;
int num_total_bin = HistogramPool::GetNumTotalHistogramBins(
train_data, this->share_state_->is_colwise, &offsets);
smaller_leaf_histogram_data_.resize(num_total_bin * 2);
larger_leaf_histogram_data_.resize(num_total_bin * 2);
HistogramPool::SetFeatureInfo<true, true>(train_data, this->config_, &feature_metas_);
uint64_t offset = 0;
for (int j = 0; j < train_data->num_features(); ++j) {
offset += static_cast<uint64_t>(train_data->SubFeatureBinOffset(j));
smaller_leaf_histogram_array_global_[j].Init(smaller_leaf_histogram_data_.data() + offset, &feature_metas_[j]);
larger_leaf_histogram_array_global_[j].Init(larger_leaf_histogram_data_.data() + offset, &feature_metas_[j]);
auto num_bin = train_data->FeatureNumBin(j);
if (train_data->FeatureBinMapper(j)->GetMostFreqBin() == 0) {
num_bin -= 1;
}
offset += static_cast<uint64_t>(num_bin);
smaller_leaf_histogram_array_global_[j].Init(smaller_leaf_histogram_data_.data() + offsets[j] * 2, &feature_metas_[j]);
larger_leaf_histogram_array_global_[j].Init(larger_leaf_histogram_data_.data() + offsets[j] * 2, &feature_metas_[j]);
}
}

Expand Down

0 comments on commit 600b69f

Please sign in to comment.