41 TreeEnsemble(
int num_trees,
int output_dimension = 1,
bool is_leaf_constant =
true,
bool is_exponentiated =
false) {
43 trees_ = std::vector<std::unique_ptr<Tree>>(num_trees);
44 for (
int i = 0; i < num_trees; i++) {
45 trees_[i].reset(
new Tree());
46 trees_[i]->Init(output_dimension, is_exponentiated);
49 num_trees_ = num_trees;
50 output_dimension_ = output_dimension;
51 is_leaf_constant_ = is_leaf_constant;
52 is_exponentiated_ = is_exponentiated;
62 num_trees_ = ensemble.num_trees_;
63 output_dimension_ = ensemble.output_dimension_;
64 is_leaf_constant_ = ensemble.is_leaf_constant_;
65 is_exponentiated_ = ensemble.is_exponentiated_;
67 trees_ = std::vector<std::unique_ptr<Tree>>(num_trees_);
68 for (
int i = 0; i < num_trees_; i++) {
69 trees_[i].reset(
new Tree());
72 for (
int j = 0; j < num_trees_; j++) {
87 int old_num_trees = num_trees_;
88 num_trees_ += ensemble.num_trees_;
89 CHECK_EQ(output_dimension_, ensemble.output_dimension_);
90 CHECK_EQ(is_leaf_constant_, ensemble.is_leaf_constant_);
91 CHECK_EQ(is_exponentiated_, ensemble.is_exponentiated_);
93 trees_.resize(num_trees_);
94 for (
int i = old_num_trees; i < num_trees_; i++) {
95 trees_[i].reset(
new Tree());
98 for (
int j = 0; j < ensemble.num_trees_; j++) {
110 for (
int j = 0; j < num_trees_; j++) {
122 for (
int j = 0; j < num_trees_; j++) {
135 return trees_[i].get();
142 for (
int i = 0; i < num_trees_; i++) {
154 trees_[i].reset(
new Tree());
164 trees_[i].reset(
new Tree());
165 trees_[i]->Init(output_dimension_, is_exponentiated_);
175 return trees_[i]->CloneFromTree(tree);
187 num_trees_ = ensemble.num_trees_;
188 output_dimension_ = ensemble.output_dimension_;
189 is_leaf_constant_ = ensemble.is_leaf_constant_;
190 is_exponentiated_ = ensemble.is_exponentiated_;
192 trees_ = std::vector<std::unique_ptr<Tree>>(num_trees_);
193 for (
int i = 0; i < num_trees_; i++) {
194 trees_[i].reset(
new Tree());
197 for (
int j = 0; j < num_trees_; j++) {
205 std::vector<double> output(n);
206 PredictInplace(dataset, output, 0);
210 std::vector<double> PredictRaw(ForestDataset& dataset) {
211 data_size_t n = dataset.NumObservations();
212 data_size_t total_output_size = n * output_dimension_;
213 std::vector<double> output(total_output_size);
214 PredictRawInplace(dataset, output, 0);
218 inline void PredictInplace(ForestDataset& dataset, std::vector<double> &output, data_size_t offset = 0) {
219 PredictInplace(dataset, output, 0, trees_.size(), offset);
222 inline void PredictInplace(ForestDataset& dataset, std::vector<double> &output,
223 int tree_begin,
int tree_end, data_size_t offset = 0) {
224 if (is_leaf_constant_) {
225 PredictInplace(dataset.GetCovariates(), output, tree_begin, tree_end, offset);
227 CHECK(dataset.HasBasis());
228 PredictInplace(dataset.GetCovariates(), dataset.GetBasis(), output, tree_begin, tree_end, offset);
232 inline void PredictInplace(Eigen::MatrixXd& covariates, Eigen::MatrixXd& basis, std::vector<double> &output, data_size_t offset = 0) {
233 PredictInplace(covariates, basis, output, 0, trees_.size(), offset);
236 inline void PredictInplace(Eigen::MatrixXd& covariates, Eigen::MatrixXd& basis, std::vector<double> &output,
237 int tree_begin,
int tree_end, data_size_t offset = 0) {
239 CHECK_EQ(covariates.rows(), basis.rows());
240 CHECK_EQ(output_dimension_, trees_[0]->OutputDimension());
241 CHECK_EQ(output_dimension_, basis.cols());
242 data_size_t n = covariates.rows();
243 data_size_t total_output_size = n;
244 if (output.size() < total_output_size + offset) {
245 Log::Fatal(
"Mismatched size of prediction vector and training data");
247 for (data_size_t i = 0; i < n; i++) {
249 for (
size_t j = tree_begin; j < tree_end; j++) {
250 auto &tree = *trees_[j];
252 for (int32_t k = 0; k < output_dimension_; k++) {
253 pred += tree.LeafValue(nidx, k) * basis(i, k);
256 if (is_exponentiated_) output[i + offset] = std::exp(pred);
257 else output[i + offset] = pred;
261 inline void PredictInplace(Eigen::MatrixXd& covariates, std::vector<double> &output, data_size_t offset = 0) {
262 PredictInplace(covariates, output, 0, trees_.size(), offset);
265 inline void PredictInplace(Eigen::MatrixXd& covariates, std::vector<double> &output,
int tree_begin,
int tree_end, data_size_t offset = 0) {
267 data_size_t n = covariates.rows();
268 data_size_t total_output_size = n;
269 if (output.size() < total_output_size + offset) {
270 Log::Fatal(
"Mismatched size of prediction vector and training data");
272 for (data_size_t i = 0; i < n; i++) {
274 for (
size_t j = tree_begin; j < tree_end; j++) {
275 auto &tree = *trees_[j];
277 pred += tree.LeafValue(nidx, 0);
279 if (is_exponentiated_) output[i + offset] = std::exp(pred);
280 else output[i + offset] = pred;
284 inline void PredictRawInplace(ForestDataset& dataset, std::vector<double> &output, data_size_t offset = 0) {
285 PredictRawInplace(dataset, output, 0, trees_.size(), offset);
288 inline void PredictRawInplace(ForestDataset& dataset, std::vector<double> &output,
289 int tree_begin,
int tree_end, data_size_t offset = 0) {
291 Eigen::MatrixXd covariates = dataset.GetCovariates();
292 CHECK_EQ(output_dimension_, trees_[0]->OutputDimension());
293 data_size_t n = covariates.rows();
294 data_size_t total_output_size = n * output_dimension_;
295 if (output.size() < total_output_size + offset) {
296 Log::Fatal(
"Mismatched size of raw prediction vector and training data");
298 for (data_size_t i = 0; i < n; i++) {
299 for (int32_t k = 0; k < output_dimension_; k++) {
301 for (
size_t j = tree_begin; j < tree_end; j++) {
302 auto &tree = *trees_[j];
304 pred += tree.LeafValue(nidx, k);
306 output[i*output_dimension_ + k + offset] = pred;
311 inline int32_t NumTrees() {
315 inline int32_t NumLeaves() {
317 for (
int i = 0; i < num_trees_; i++) {
318 result += trees_[i]->NumLeaves();
323 inline double SumLeafSquared() {
325 for (
int i = 0; i < num_trees_; i++) {
326 result += trees_[i]->SumSquaredLeafValues();
331 inline int32_t OutputDimension() {
332 return output_dimension_;
335 inline bool IsLeafConstant() {
336 return is_leaf_constant_;
339 inline bool IsExponentiated() {
340 return is_exponentiated_;
343 inline int32_t TreeMaxDepth(
int tree_num) {
344 return trees_[tree_num]->MaxLeafDepth();
347 inline double AverageMaxDepth() {
348 double numerator = 0.;
349 double denominator = 0.;
350 for (
int i = 0; i < num_trees_; i++) {
351 numerator +=
static_cast<double>(TreeMaxDepth(i));
354 return numerator / denominator;
357 inline bool AllRoots() {
358 for (
int i = 0; i < num_trees_; i++) {
359 if (!trees_[i]->IsRoot()) {
366 inline void SetLeafValue(
double leaf_value) {
367 CHECK_EQ(output_dimension_, 1);
368 for (
int i = 0; i < num_trees_; i++) {
369 CHECK(trees_[i]->IsRoot());
370 trees_[i]->SetLeaf(0, leaf_value);
374 inline void SetLeafVector(std::vector<double>& leaf_vector) {
375 CHECK_EQ(output_dimension_, leaf_vector.size());
376 for (
int i = 0; i < num_trees_; i++) {
377 CHECK(trees_[i]->IsRoot());
378 trees_[i]->SetLeafVector(0, leaf_vector);
389 for (
int j = 0; j < num_trees_; j++) {
390 auto &tree = *trees_[j];
391 max_leaf += tree.NumLeaves();
434 void PredictLeafIndicesInplace(Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>>& covariates, std::vector<int32_t>& output,
int num_trees, data_size_t n) {
435 CHECK_GE(output.size(), num_trees*n);
438 for (
int j = 0; j < num_trees; j++) {
439 auto &tree = *trees_[j];
440 int num_leaves = tree.NumLeaves();
441 tree.PredictLeafIndexInplace(covariates, output, offset, max_leaf);
443 max_leaf += num_leaves;
466 Eigen::Map<Eigen::Matrix<int, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>>& output,
467 int column_ind,
int num_trees, data_size_t n) {
468 CHECK_GE(output.size(), num_trees*n);
471 for (
int j = 0; j < num_trees; j++) {
472 auto &tree = *trees_[j];
473 int num_leaves = tree.NumLeaves();
474 tree.PredictLeafIndexInplace(covariates, output, column_ind, offset, max_leaf);
476 max_leaf += num_leaves;
498 CHECK_GE(output.size(), num_trees*n);
501 for (
int j = 0; j < num_trees; j++) {
502 auto &tree = *trees_[j];
503 int num_leaves = tree.NumLeaves();
504 tree.PredictLeafIndexInplace(covariates, output, offset, max_leaf);
506 max_leaf += num_leaves;
515 int num_trees = num_trees_;
517 std::vector<int32_t> output(n*num_trees);
525 result_obj.emplace(
"num_trees", this->num_trees_);
526 result_obj.emplace(
"output_dimension", this->output_dimension_);
527 result_obj.emplace(
"is_leaf_constant", this->is_leaf_constant_);
528 result_obj.emplace(
"is_exponentiated", this->is_exponentiated_);
530 std::string tree_label;
531 for (
int i = 0; i < trees_.size(); i++) {
532 tree_label =
"tree_" + std::to_string(i);
533 result_obj.emplace(tree_label, trees_[i]->
to_json());
541 this->num_trees_ = ensemble_json.at(
"num_trees");
542 this->output_dimension_ = ensemble_json.at(
"output_dimension");
543 this->is_leaf_constant_ = ensemble_json.at(
"is_leaf_constant");
544 this->is_exponentiated_ = ensemble_json.at(
"is_exponentiated");
546 std::string tree_label;
548 trees_.resize(this->num_trees_);
549 for (
int i = 0; i < this->num_trees_; i++) {
550 tree_label =
"tree_" + std::to_string(i);
551 trees_[i] = std::make_unique<Tree>();
552 trees_[i]->from_json(ensemble_json.at(tree_label));
557 std::vector<std::unique_ptr<Tree>> trees_;
559 int output_dimension_;
560 bool is_leaf_constant_;
561 bool is_exponentiated_;