47 TreeEnsemble(
int num_trees,
int output_dimension = 1,
bool is_leaf_constant =
true,
bool is_exponentiated =
false) {
49 trees_ = std::vector<std::unique_ptr<Tree>>(num_trees);
50 for (
int i = 0; i < num_trees; i++) {
51 trees_[i].reset(
new Tree());
52 trees_[i]->Init(output_dimension, is_exponentiated);
55 num_trees_ = num_trees;
56 output_dimension_ = output_dimension;
57 is_leaf_constant_ = is_leaf_constant;
58 is_exponentiated_ = is_exponentiated;
68 num_trees_ = ensemble.num_trees_;
69 output_dimension_ = ensemble.output_dimension_;
70 is_leaf_constant_ = ensemble.is_leaf_constant_;
71 is_exponentiated_ = ensemble.is_exponentiated_;
73 trees_ = std::vector<std::unique_ptr<Tree>>(num_trees_);
74 for (
int i = 0; i < num_trees_; i++) {
75 trees_[i].reset(
new Tree());
78 for (
int j = 0; j < num_trees_; j++) {
93 return trees_[i].get();
100 for (
int i = 0; i < num_trees_; i++) {
112 trees_[i].reset(
new Tree());
122 trees_[i].reset(
new Tree());
123 trees_[i]->Init(output_dimension_, is_exponentiated_);
133 return trees_[i]->CloneFromTree(tree);
145 num_trees_ = ensemble.num_trees_;
146 output_dimension_ = ensemble.output_dimension_;
147 is_leaf_constant_ = ensemble.is_leaf_constant_;
148 is_exponentiated_ = ensemble.is_exponentiated_;
150 trees_ = std::vector<std::unique_ptr<Tree>>(num_trees_);
151 for (
int i = 0; i < num_trees_; i++) {
152 trees_[i].reset(
new Tree());
155 for (
int j = 0; j < num_trees_; j++) {
163 std::vector<double> output(n);
164 PredictInplace(dataset, output, 0);
168 std::vector<double> PredictRaw(ForestDataset& dataset) {
169 data_size_t n = dataset.NumObservations();
170 data_size_t total_output_size = n * output_dimension_;
171 std::vector<double> output(total_output_size);
172 PredictRawInplace(dataset, output, 0);
176 inline void PredictInplace(ForestDataset& dataset, std::vector<double> &output, data_size_t offset = 0) {
177 PredictInplace(dataset, output, 0, trees_.size(), offset);
180 inline void PredictInplace(ForestDataset& dataset, std::vector<double> &output,
181 int tree_begin,
int tree_end, data_size_t offset = 0) {
182 if (is_leaf_constant_) {
183 PredictInplace(dataset.GetCovariates(), output, tree_begin, tree_end, offset);
185 CHECK(dataset.HasBasis());
186 PredictInplace(dataset.GetCovariates(), dataset.GetBasis(), output, tree_begin, tree_end, offset);
190 inline void PredictInplace(Eigen::MatrixXd& covariates, Eigen::MatrixXd& basis, std::vector<double> &output, data_size_t offset = 0) {
191 PredictInplace(covariates, basis, output, 0, trees_.size(), offset);
194 inline void PredictInplace(Eigen::MatrixXd& covariates, Eigen::MatrixXd& basis, std::vector<double> &output,
195 int tree_begin,
int tree_end, data_size_t offset = 0) {
197 CHECK_EQ(covariates.rows(), basis.rows());
198 CHECK_EQ(output_dimension_, trees_[0]->OutputDimension());
199 CHECK_EQ(output_dimension_, basis.cols());
200 data_size_t n = covariates.rows();
201 data_size_t total_output_size = n;
202 if (output.size() < total_output_size + offset) {
203 Log::Fatal(
"Mismatched size of prediction vector and training data");
205 for (data_size_t i = 0; i < n; i++) {
207 for (
size_t j = tree_begin; j < tree_end; j++) {
208 auto &tree = *trees_[j];
210 for (int32_t k = 0; k < output_dimension_; k++) {
211 pred += tree.LeafValue(nidx, k) * basis(i, k);
214 if (is_exponentiated_) output[i + offset] = std::exp(pred);
215 else output[i + offset] = pred;
219 inline void PredictInplace(Eigen::MatrixXd& covariates, std::vector<double> &output, data_size_t offset = 0) {
220 PredictInplace(covariates, output, 0, trees_.size(), offset);
223 inline void PredictInplace(Eigen::MatrixXd& covariates, std::vector<double> &output,
int tree_begin,
int tree_end, data_size_t offset = 0) {
225 data_size_t n = covariates.rows();
226 data_size_t total_output_size = n;
227 if (output.size() < total_output_size + offset) {
228 Log::Fatal(
"Mismatched size of prediction vector and training data");
230 for (data_size_t i = 0; i < n; i++) {
232 for (
size_t j = tree_begin; j < tree_end; j++) {
233 auto &tree = *trees_[j];
235 pred += tree.LeafValue(nidx, 0);
237 if (is_exponentiated_) output[i + offset] = std::exp(pred);
238 else output[i + offset] = pred;
242 inline void PredictRawInplace(ForestDataset& dataset, std::vector<double> &output, data_size_t offset = 0) {
243 PredictRawInplace(dataset, output, 0, trees_.size(), offset);
246 inline void PredictRawInplace(ForestDataset& dataset, std::vector<double> &output,
247 int tree_begin,
int tree_end, data_size_t offset = 0) {
249 Eigen::MatrixXd covariates = dataset.GetCovariates();
250 CHECK_EQ(output_dimension_, trees_[0]->OutputDimension());
251 data_size_t n = covariates.rows();
252 data_size_t total_output_size = n * output_dimension_;
253 if (output.size() < total_output_size + offset) {
254 Log::Fatal(
"Mismatched size of raw prediction vector and training data");
256 for (data_size_t i = 0; i < n; i++) {
257 for (int32_t k = 0; k < output_dimension_; k++) {
259 for (
size_t j = tree_begin; j < tree_end; j++) {
260 auto &tree = *trees_[j];
262 pred += tree.LeafValue(nidx, k);
264 output[i*output_dimension_ + k + offset] = pred;
269 inline int32_t NumTrees() {
273 inline int32_t NumLeaves() {
275 for (
int i = 0; i < num_trees_; i++) {
276 result += trees_[i]->NumLeaves();
281 inline double SumLeafSquared() {
283 for (
int i = 0; i < num_trees_; i++) {
284 result += trees_[i]->SumSquaredLeafValues();
289 inline int32_t OutputDimension() {
290 return output_dimension_;
293 inline bool IsLeafConstant() {
294 return is_leaf_constant_;
297 inline bool IsExponentiated() {
298 return is_exponentiated_;
301 inline int32_t TreeMaxDepth(
int tree_num) {
302 return trees_[tree_num]->MaxLeafDepth();
305 inline double AverageMaxDepth() {
306 double numerator = 0.;
307 double denominator = 0.;
308 for (
int i = 0; i < num_trees_; i++) {
309 numerator +=
static_cast<double>(TreeMaxDepth(i));
312 return numerator / denominator;
315 inline bool AllRoots() {
316 for (
int i = 0; i < num_trees_; i++) {
317 if (!trees_[i]->IsRoot()) {
324 inline void SetLeafValue(
double leaf_value) {
325 CHECK_EQ(output_dimension_, 1);
326 for (
int i = 0; i < num_trees_; i++) {
327 CHECK(trees_[i]->IsRoot());
328 trees_[i]->SetLeaf(0, leaf_value);
332 inline void SetLeafVector(std::vector<double>& leaf_vector) {
333 CHECK_EQ(output_dimension_, leaf_vector.size());
334 for (
int i = 0; i < num_trees_; i++) {
335 CHECK(trees_[i]->IsRoot());
336 trees_[i]->SetLeafVector(0, leaf_vector);
347 for (
int j = 0; j < num_trees_; j++) {
348 auto &tree = *trees_[j];
349 max_leaf += tree.NumLeaves();
392 void PredictLeafIndicesInplace(Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>>& covariates, std::vector<int32_t>& output,
int num_trees, data_size_t n) {
393 CHECK_GE(output.size(), num_trees*n);
396 for (
int j = 0; j < num_trees; j++) {
397 auto &tree = *trees_[j];
398 int num_leaves = tree.NumLeaves();
399 tree.PredictLeafIndexInplace(covariates, output, offset, max_leaf);
401 max_leaf += num_leaves;
424 Eigen::Map<Eigen::Matrix<int, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>>& output,
425 int column_ind,
int num_trees, data_size_t n) {
426 CHECK_GE(output.size(), num_trees*n);
429 for (
int j = 0; j < num_trees; j++) {
430 auto &tree = *trees_[j];
431 int num_leaves = tree.NumLeaves();
432 tree.PredictLeafIndexInplace(covariates, output, column_ind, offset, max_leaf);
434 max_leaf += num_leaves;
456 CHECK_GE(output.size(), num_trees*n);
459 for (
int j = 0; j < num_trees; j++) {
460 auto &tree = *trees_[j];
461 int num_leaves = tree.NumLeaves();
462 tree.PredictLeafIndexInplace(covariates, output, offset, max_leaf);
464 max_leaf += num_leaves;
473 int num_trees = num_trees_;
475 std::vector<int32_t> output(n*num_trees);
483 result_obj.emplace(
"num_trees", this->num_trees_);
484 result_obj.emplace(
"output_dimension", this->output_dimension_);
485 result_obj.emplace(
"is_leaf_constant", this->is_leaf_constant_);
486 result_obj.emplace(
"is_exponentiated", this->is_exponentiated_);
488 std::string tree_label;
489 for (
int i = 0; i < trees_.size(); i++) {
490 tree_label =
"tree_" + std::to_string(i);
491 result_obj.emplace(tree_label, trees_[i]->
to_json());
499 this->num_trees_ = ensemble_json.at(
"num_trees");
500 this->output_dimension_ = ensemble_json.at(
"output_dimension");
501 this->is_leaf_constant_ = ensemble_json.at(
"is_leaf_constant");
502 this->is_exponentiated_ = ensemble_json.at(
"is_exponentiated");
504 std::string tree_label;
506 trees_.resize(this->num_trees_);
507 for (
int i = 0; i < this->num_trees_; i++) {
508 tree_label =
"tree_" + std::to_string(i);
509 trees_[i] = std::make_unique<Tree>();
510 trees_[i]->from_json(ensemble_json.at(tree_label));
515 std::vector<std::unique_ptr<Tree>> trees_;
517 int output_dimension_;
518 bool is_leaf_constant_;
519 bool is_exponentiated_;