47 TreeEnsemble(
int num_trees,
int output_dimension = 1,
bool is_leaf_constant =
true,
bool is_exponentiated =
false) {
49 trees_ = std::vector<std::unique_ptr<Tree>>(num_trees);
50 for (
int i = 0; i < num_trees; i++) {
51 trees_[i].reset(
new Tree());
52 trees_[i]->Init(output_dimension, is_exponentiated);
55 num_trees_ = num_trees;
56 output_dimension_ = output_dimension;
57 is_leaf_constant_ = is_leaf_constant;
58 is_exponentiated_ = is_exponentiated;
68 num_trees_ = ensemble.num_trees_;
69 output_dimension_ = ensemble.output_dimension_;
70 is_leaf_constant_ = ensemble.is_leaf_constant_;
71 is_exponentiated_ = ensemble.is_exponentiated_;
73 trees_ = std::vector<std::unique_ptr<Tree>>(num_trees_);
74 for (
int i = 0; i < num_trees_; i++) {
75 trees_[i].reset(
new Tree());
78 for (
int j = 0; j < num_trees_; j++) {
93 int old_num_trees = num_trees_;
94 num_trees_ += ensemble.num_trees_;
95 CHECK_EQ(output_dimension_, ensemble.output_dimension_);
96 CHECK_EQ(is_leaf_constant_, ensemble.is_leaf_constant_);
97 CHECK_EQ(is_exponentiated_, ensemble.is_exponentiated_);
99 trees_.resize(num_trees_);
100 for (
int i = old_num_trees; i < num_trees_; i++) {
101 trees_[i].reset(
new Tree());
104 for (
int j = 0; j < ensemble.num_trees_; j++) {
116 for (
int j = 0; j < num_trees_; j++) {
128 for (
int j = 0; j < num_trees_; j++) {
141 return trees_[i].get();
148 for (
int i = 0; i < num_trees_; i++) {
160 trees_[i].reset(
new Tree());
170 trees_[i].reset(
new Tree());
171 trees_[i]->Init(output_dimension_, is_exponentiated_);
181 return trees_[i]->CloneFromTree(tree);
193 num_trees_ = ensemble.num_trees_;
194 output_dimension_ = ensemble.output_dimension_;
195 is_leaf_constant_ = ensemble.is_leaf_constant_;
196 is_exponentiated_ = ensemble.is_exponentiated_;
198 trees_ = std::vector<std::unique_ptr<Tree>>(num_trees_);
199 for (
int i = 0; i < num_trees_; i++) {
200 trees_[i].reset(
new Tree());
203 for (
int j = 0; j < num_trees_; j++) {
211 std::vector<double> output(n);
212 PredictInplace(dataset, output, 0);
216 std::vector<double> PredictRaw(ForestDataset& dataset) {
217 data_size_t n = dataset.NumObservations();
218 data_size_t total_output_size = n * output_dimension_;
219 std::vector<double> output(total_output_size);
220 PredictRawInplace(dataset, output, 0);
224 inline void PredictInplace(ForestDataset& dataset, std::vector<double> &output, data_size_t offset = 0) {
225 PredictInplace(dataset, output, 0, trees_.size(), offset);
228 inline void PredictInplace(ForestDataset& dataset, std::vector<double> &output,
229 int tree_begin,
int tree_end, data_size_t offset = 0) {
230 if (is_leaf_constant_) {
231 PredictInplace(dataset.GetCovariates(), output, tree_begin, tree_end, offset);
233 CHECK(dataset.HasBasis());
234 PredictInplace(dataset.GetCovariates(), dataset.GetBasis(), output, tree_begin, tree_end, offset);
238 inline void PredictInplace(Eigen::MatrixXd& covariates, Eigen::MatrixXd& basis, std::vector<double> &output, data_size_t offset = 0) {
239 PredictInplace(covariates, basis, output, 0, trees_.size(), offset);
242 inline void PredictInplace(Eigen::MatrixXd& covariates, Eigen::MatrixXd& basis, std::vector<double> &output,
243 int tree_begin,
int tree_end, data_size_t offset = 0) {
245 CHECK_EQ(covariates.rows(), basis.rows());
246 CHECK_EQ(output_dimension_, trees_[0]->OutputDimension());
247 CHECK_EQ(output_dimension_, basis.cols());
248 data_size_t n = covariates.rows();
249 data_size_t total_output_size = n;
250 if (output.size() < total_output_size + offset) {
251 Log::Fatal(
"Mismatched size of prediction vector and training data");
253 for (data_size_t i = 0; i < n; i++) {
255 for (
size_t j = tree_begin; j < tree_end; j++) {
256 auto &tree = *trees_[j];
258 for (int32_t k = 0; k < output_dimension_; k++) {
259 pred += tree.LeafValue(nidx, k) * basis(i, k);
262 if (is_exponentiated_) output[i + offset] = std::exp(pred);
263 else output[i + offset] = pred;
267 inline void PredictInplace(Eigen::MatrixXd& covariates, std::vector<double> &output, data_size_t offset = 0) {
268 PredictInplace(covariates, output, 0, trees_.size(), offset);
271 inline void PredictInplace(Eigen::MatrixXd& covariates, std::vector<double> &output,
int tree_begin,
int tree_end, data_size_t offset = 0) {
273 data_size_t n = covariates.rows();
274 data_size_t total_output_size = n;
275 if (output.size() < total_output_size + offset) {
276 Log::Fatal(
"Mismatched size of prediction vector and training data");
278 for (data_size_t i = 0; i < n; i++) {
280 for (
size_t j = tree_begin; j < tree_end; j++) {
281 auto &tree = *trees_[j];
283 pred += tree.LeafValue(nidx, 0);
285 if (is_exponentiated_) output[i + offset] = std::exp(pred);
286 else output[i + offset] = pred;
290 inline void PredictRawInplace(ForestDataset& dataset, std::vector<double> &output, data_size_t offset = 0) {
291 PredictRawInplace(dataset, output, 0, trees_.size(), offset);
294 inline void PredictRawInplace(ForestDataset& dataset, std::vector<double> &output,
295 int tree_begin,
int tree_end, data_size_t offset = 0) {
297 Eigen::MatrixXd covariates = dataset.GetCovariates();
298 CHECK_EQ(output_dimension_, trees_[0]->OutputDimension());
299 data_size_t n = covariates.rows();
300 data_size_t total_output_size = n * output_dimension_;
301 if (output.size() < total_output_size + offset) {
302 Log::Fatal(
"Mismatched size of raw prediction vector and training data");
304 for (data_size_t i = 0; i < n; i++) {
305 for (int32_t k = 0; k < output_dimension_; k++) {
307 for (
size_t j = tree_begin; j < tree_end; j++) {
308 auto &tree = *trees_[j];
310 pred += tree.LeafValue(nidx, k);
312 output[i*output_dimension_ + k + offset] = pred;
317 inline int32_t NumTrees() {
321 inline int32_t NumLeaves() {
323 for (
int i = 0; i < num_trees_; i++) {
324 result += trees_[i]->NumLeaves();
329 inline double SumLeafSquared() {
331 for (
int i = 0; i < num_trees_; i++) {
332 result += trees_[i]->SumSquaredLeafValues();
337 inline int32_t OutputDimension() {
338 return output_dimension_;
341 inline bool IsLeafConstant() {
342 return is_leaf_constant_;
345 inline bool IsExponentiated() {
346 return is_exponentiated_;
349 inline int32_t TreeMaxDepth(
int tree_num) {
350 return trees_[tree_num]->MaxLeafDepth();
353 inline double AverageMaxDepth() {
354 double numerator = 0.;
355 double denominator = 0.;
356 for (
int i = 0; i < num_trees_; i++) {
357 numerator +=
static_cast<double>(TreeMaxDepth(i));
360 return numerator / denominator;
363 inline bool AllRoots() {
364 for (
int i = 0; i < num_trees_; i++) {
365 if (!trees_[i]->IsRoot()) {
372 inline void SetLeafValue(
double leaf_value) {
373 CHECK_EQ(output_dimension_, 1);
374 for (
int i = 0; i < num_trees_; i++) {
375 CHECK(trees_[i]->IsRoot());
376 trees_[i]->SetLeaf(0, leaf_value);
380 inline void SetLeafVector(std::vector<double>& leaf_vector) {
381 CHECK_EQ(output_dimension_, leaf_vector.size());
382 for (
int i = 0; i < num_trees_; i++) {
383 CHECK(trees_[i]->IsRoot());
384 trees_[i]->SetLeafVector(0, leaf_vector);
395 for (
int j = 0; j < num_trees_; j++) {
396 auto &tree = *trees_[j];
397 max_leaf += tree.NumLeaves();
440 void PredictLeafIndicesInplace(Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>>& covariates, std::vector<int32_t>& output,
int num_trees, data_size_t n) {
441 CHECK_GE(output.size(), num_trees*n);
444 for (
int j = 0; j < num_trees; j++) {
445 auto &tree = *trees_[j];
446 int num_leaves = tree.NumLeaves();
447 tree.PredictLeafIndexInplace(covariates, output, offset, max_leaf);
449 max_leaf += num_leaves;
472 Eigen::Map<Eigen::Matrix<int, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>>& output,
473 int column_ind,
int num_trees, data_size_t n) {
474 CHECK_GE(output.size(), num_trees*n);
477 for (
int j = 0; j < num_trees; j++) {
478 auto &tree = *trees_[j];
479 int num_leaves = tree.NumLeaves();
480 tree.PredictLeafIndexInplace(covariates, output, column_ind, offset, max_leaf);
482 max_leaf += num_leaves;
504 CHECK_GE(output.size(), num_trees*n);
507 for (
int j = 0; j < num_trees; j++) {
508 auto &tree = *trees_[j];
509 int num_leaves = tree.NumLeaves();
510 tree.PredictLeafIndexInplace(covariates, output, offset, max_leaf);
512 max_leaf += num_leaves;
521 int num_trees = num_trees_;
523 std::vector<int32_t> output(n*num_trees);
531 result_obj.emplace(
"num_trees", this->num_trees_);
532 result_obj.emplace(
"output_dimension", this->output_dimension_);
533 result_obj.emplace(
"is_leaf_constant", this->is_leaf_constant_);
534 result_obj.emplace(
"is_exponentiated", this->is_exponentiated_);
536 std::string tree_label;
537 for (
int i = 0; i < trees_.size(); i++) {
538 tree_label =
"tree_" + std::to_string(i);
539 result_obj.emplace(tree_label, trees_[i]->
to_json());
547 this->num_trees_ = ensemble_json.at(
"num_trees");
548 this->output_dimension_ = ensemble_json.at(
"output_dimension");
549 this->is_leaf_constant_ = ensemble_json.at(
"is_leaf_constant");
550 this->is_exponentiated_ = ensemble_json.at(
"is_exponentiated");
552 std::string tree_label;
554 trees_.resize(this->num_trees_);
555 for (
int i = 0; i < this->num_trees_; i++) {
556 tree_label =
"tree_" + std::to_string(i);
557 trees_[i] = std::make_unique<Tree>();
558 trees_[i]->from_json(ensemble_json.at(tree_label));
563 std::vector<std::unique_ptr<Tree>> trees_;
565 int output_dimension_;
566 bool is_leaf_constant_;
567 bool is_exponentiated_;