25#ifndef STOCHTREE_PARTITION_TRACKER_H_
26#define STOCHTREE_PARTITION_TRACKER_H_
28#include <stochtree/data.h>
29#include <stochtree/ensemble.h>
30#include <stochtree/log.h>
31#include <stochtree/tree.h>
43class SampleNodeMapper;
44class SamplePredMapper;
45class UnsortedNodeSampleTracker;
46class SortedNodeSampleTracker;
47class FeaturePresortRootContainer;
60 ForestTracker(Eigen::MatrixXd& covariates, std::vector<FeatureType>& feature_types,
int num_trees,
int num_observations);
63 void AssignAllSamplesToRoot();
64 void AssignAllSamplesToRoot(int32_t tree_num);
65 void AssignAllSamplesToConstantPrediction(
double value);
66 void AssignAllSamplesToConstantPrediction(int32_t tree_num,
double value);
70 void ResetRoot(Eigen::MatrixXd& covariates, std::vector<FeatureType>& feature_types, int32_t tree_num);
71 void AddSplit(Eigen::MatrixXd& covariates,
TreeSplit& split, int32_t split_feature, int32_t tree_id, int32_t split_node_id, int32_t left_node_id, int32_t right_node_id,
bool keep_sorted =
false);
72 void RemoveSplit(Eigen::MatrixXd& covariates,
Tree* tree, int32_t tree_id, int32_t split_node_id, int32_t left_node_id, int32_t right_node_id,
bool keep_sorted =
false);
73 double GetSamplePrediction(data_size_t sample_id);
74 double GetTreeSamplePrediction(data_size_t sample_id,
int tree_id);
75 void UpdateVarWeightsFromInternalPredictions(
ForestDataset& dataset);
76 void SetSamplePrediction(data_size_t sample_id,
double value);
77 void SetTreeSamplePrediction(data_size_t sample_id,
int tree_id,
double value);
78 void SyncPredictions();
79 data_size_t GetNodeId(
int observation_num,
int tree_num);
80 data_size_t UnsortedNodeBegin(
int tree_id,
int node_id);
81 data_size_t UnsortedNodeEnd(
int tree_id,
int node_id);
82 data_size_t UnsortedNodeSize(
int tree_id,
int node_id);
83 data_size_t SortedNodeBegin(
int node_id,
int feature_id);
84 data_size_t SortedNodeEnd(
int node_id,
int feature_id);
85 data_size_t SortedNodeSize(
int node_id,
int feature_id);
86 std::vector<data_size_t>::iterator UnsortedNodeBeginIterator(
int tree_id,
int node_id);
87 std::vector<data_size_t>::iterator UnsortedNodeEndIterator(
int tree_id,
int node_id);
88 std::vector<data_size_t>::iterator SortedNodeBeginIterator(
int node_id,
int feature_id);
89 std::vector<data_size_t>::iterator SortedNodeEndIterator(
int node_id,
int feature_id);
91 SampleNodeMapper* GetSampleNodeMapper() {
return sample_node_mapper_.get();}
92 UnsortedNodeSampleTracker* GetUnsortedNodeSampleTracker() {
return unsorted_node_sample_tracker_.get();}
93 SortedNodeSampleTracker* GetSortedNodeSampleTracker() {
return sorted_node_sample_tracker_.get();}
94 int GetNumObservations() {
return num_observations_;}
95 int GetNumTrees() {
return num_trees_;}
96 int GetNumFeatures() {
return num_features_;}
97 bool Initialized() {
return initialized_;}
101 std::vector<double> sum_predictions_;
103 std::unique_ptr<SamplePredMapper> sample_pred_mapper_;
105 std::unique_ptr<SampleNodeMapper> sample_node_mapper_;
109 std::unique_ptr<UnsortedNodeSampleTracker> unsorted_node_sample_tracker_;
113 std::unique_ptr<FeaturePresortRootContainer> presort_container_;
114 std::unique_ptr<SortedNodeSampleTracker> sorted_node_sample_tracker_;
115 std::vector<FeatureType> feature_types_;
117 int num_observations_;
119 bool initialized_{
false};
121 void UpdatePredictionsInternal(TreeEnsemble* ensemble, Eigen::MatrixXd& covariates, Eigen::MatrixXd& basis);
122 void UpdatePredictionsInternal(TreeEnsemble* ensemble, Eigen::MatrixXd& covariates);
123 void UpdateSampleTrackersInternal(TreeEnsemble& forest, Eigen::MatrixXd& covariates, Eigen::MatrixXd& basis);
124 void UpdateSampleTrackersInternal(TreeEnsemble& forest, Eigen::MatrixXd& covariates);
125 void UpdateSampleTrackersResidualInternalBasis(TreeEnsemble& forest, ForestDataset& dataset, ColumnVector& residual,
bool is_mean_model);
126 void UpdateSampleTrackersResidualInternalNoBasis(TreeEnsemble& forest, ForestDataset& dataset, ColumnVector& residual,
bool is_mean_model);
133 num_trees_ = num_trees;
134 num_observations_ = num_observations;
136 tree_preds_.resize(num_trees_);
137 for (
int j = 0; j < num_trees_; j++) {
138 tree_preds_[j].resize(num_observations_);
142 inline double GetPred(data_size_t sample_id,
int tree_id) {
143 CHECK_LT(sample_id, num_observations_);
144 CHECK_LT(tree_id, num_trees_);
145 return tree_preds_[tree_id][sample_id];
148 inline void SetPred(data_size_t sample_id,
int tree_id,
double value) {
149 CHECK_LT(sample_id, num_observations_);
150 CHECK_LT(tree_id, num_trees_);
151 tree_preds_[tree_id][sample_id] = value;
154 inline int NumTrees() {
return num_trees_;}
156 inline int NumObservations() {
return num_observations_;}
158 inline void AssignAllSamplesToConstantPrediction(
int tree_id,
double value) {
159 for (data_size_t i = 0; i < num_observations_; i++) {
160 tree_preds_[tree_id][i] = value;
165 std::vector<std::vector<double>> tree_preds_;
167 data_size_t num_observations_;
174 num_trees_ = num_trees;
175 num_observations_ = num_observations;
177 tree_observation_indices_.resize(num_trees_);
178 for (
int j = 0; j < num_trees_; j++) {
179 tree_observation_indices_[j].resize(num_observations_);
184 num_trees_ = other.NumTrees();
185 num_observations_ = other.NumObservations();
187 tree_observation_indices_.resize(num_trees_);
188 for (
int j = 0; j < num_trees_; j++) {
189 tree_observation_indices_[j].resize(num_observations_);
190 for (
int i = 0; i < num_observations_; i++) {
191 tree_observation_indices_[j][i] = other.GetNodeId(i, j);
196 void AddSplit(Eigen::MatrixXd& covariates,
TreeSplit& split, int32_t split_feature, int32_t tree_id, int32_t split_node_id, int32_t left_node_id, int32_t right_node_id) {
197 CHECK_EQ(num_observations_, covariates.rows());
199 for (
int i = 0; i < num_observations_; i++) {
200 if (tree_observation_indices_[tree_id][i] == split_node_id) {
201 auto fvalue = covariates(i, split_feature);
203 tree_observation_indices_[tree_id][i] = left_node_id;
205 tree_observation_indices_[tree_id][i] = right_node_id;
211 inline data_size_t GetNodeId(data_size_t sample_id,
int tree_id) {
212 CHECK_LT(sample_id, num_observations_);
213 CHECK_LT(tree_id, num_trees_);
214 return tree_observation_indices_[tree_id][sample_id];
217 inline void SetNodeId(data_size_t sample_id,
int tree_id,
int node_id) {
218 CHECK_LT(sample_id, num_observations_);
219 CHECK_LT(tree_id, num_trees_);
220 tree_observation_indices_[tree_id][sample_id] = node_id;
223 inline int NumTrees() {
return num_trees_;}
225 inline int NumObservations() {
return num_observations_;}
227 inline void AssignAllSamplesToRoot(
int tree_id) {
228 for (data_size_t i = 0; i < num_observations_; i++) {
229 tree_observation_indices_[tree_id][i] = 0;
234 std::vector<std::vector<int>> tree_observation_indices_;
236 data_size_t num_observations_;
248 void PartitionNode(Eigen::MatrixXd& covariates,
int node_id,
int left_node_id,
int right_node_id,
int feature_split,
TreeSplit& split);
251 void PartitionNode(Eigen::MatrixXd& covariates,
int node_id,
int left_node_id,
int right_node_id,
int feature_split,
double split_value);
254 void PartitionNode(Eigen::MatrixXd& covariates,
int node_id,
int left_node_id,
int right_node_id,
int feature_split, std::vector<std::uint32_t>
const& category_list);
300 std::vector<data_size_t> node_begin_;
301 std::vector<data_size_t> node_length_;
302 std::vector<int32_t> parent_nodes_;
303 std::vector<int32_t> left_nodes_;
304 std::vector<int32_t> right_nodes_;
305 int num_nodes_, num_deleted_nodes_;
306 std::vector<int> deleted_nodes_;
309 void ExpandNodeTrackingVectors(
int node_id,
int left_node_id,
int right_node_id, data_size_t node_start_idx, data_size_t num_left, data_size_t num_right);
310 void ConvertLeafParentToLeaf(
int node_id);
317 feature_partitions_.resize(num_trees);
318 num_trees_ = num_trees;
319 for (
int i = 0; i < num_trees; i++) {
328 void PartitionTreeNode(Eigen::MatrixXd& covariates,
int tree_id,
int node_id,
int left_node_id,
int right_node_id,
int feature_split,
TreeSplit& split) {
329 return feature_partitions_[tree_id]->PartitionNode(covariates, node_id, left_node_id, right_node_id, feature_split, split);
333 void PartitionTreeNode(Eigen::MatrixXd& covariates,
int tree_id,
int node_id,
int left_node_id,
int right_node_id,
int feature_split,
double split_value) {
334 return feature_partitions_[tree_id]->PartitionNode(covariates, node_id, left_node_id, right_node_id, feature_split, split_value);
338 void PartitionTreeNode(Eigen::MatrixXd& covariates,
int tree_id,
int node_id,
int left_node_id,
int right_node_id,
int feature_split, std::vector<std::uint32_t>
const& category_list) {
339 return feature_partitions_[tree_id]->PartitionNode(covariates, node_id, left_node_id, right_node_id, feature_split, category_list);
349 return feature_partitions_[tree_id]->PruneNodeToLeaf(node_id);
354 return feature_partitions_[tree_id]->IsLeaf(node_id);
359 return feature_partitions_[tree_id]->IsValidNode(node_id);
364 return feature_partitions_[tree_id]->LeftNodeIsLeaf(node_id);
369 return feature_partitions_[tree_id]->RightNodeIsLeaf(node_id);
374 return feature_partitions_[tree_id]->NodeBegin(node_id);
378 data_size_t
NodeEnd(
int tree_id,
int node_id) {
379 return feature_partitions_[tree_id]->NodeEnd(node_id);
382 std::vector<data_size_t>::iterator NodeBeginIterator(
int tree_id,
int node_id) {
383 data_size_t node_begin = feature_partitions_[tree_id]->NodeBegin(node_id);
384 auto begin_iter = feature_partitions_[tree_id]->indices_.begin();
385 return begin_iter + node_begin;
388 std::vector<data_size_t>::iterator NodeEndIterator(
int tree_id,
int node_id) {
389 int node_end = feature_partitions_[tree_id]->NodeEnd(node_id);
390 auto begin_iter = feature_partitions_[tree_id]->indices_.begin();
391 return begin_iter + node_end;
396 return feature_partitions_[tree_id]->NodeSize(node_id);
401 return feature_partitions_[tree_id]->Parent(node_id);
406 return feature_partitions_[tree_id]->LeftNode(node_id);
411 return feature_partitions_[tree_id]->RightNode(node_id);
416 return feature_partitions_[tree_id]->NodeIndices(node_id);
421 feature_partitions_[tree_id]->UpdateObservationMapping(node_id, tree_id, sample_node_mapper);
426 std::vector<int> leaves = tree->
GetLeaves();
428 for (
int i = 0; i < leaves.size(); i++) {
442 std::vector<std::unique_ptr<FeatureUnsortedPartition>> feature_partitions_;
449 NodeOffsetSize(data_size_t node_offset, data_size_t node_size) : node_begin_{node_offset}, node_size_{node_size}, presorted_{
false} {
450 node_end_ = node_begin_ + node_size_;
455 void SetSorted() {presorted_ =
true;}
457 bool IsSorted() {
return presorted_;}
459 data_size_t Begin() {
return node_begin_;}
461 data_size_t End() {
return node_end_;}
463 data_size_t Size() {
return node_size_;}
466 data_size_t node_begin_;
467 data_size_t node_size_;
468 data_size_t node_end_;
487 FeaturePresortRoot(Eigen::MatrixXd& covariates, int32_t feature_index, FeatureType feature_type) {
488 feature_index_ = feature_index;
489 ArgsortRoot(covariates);
494 void ArgsortRoot(Eigen::MatrixXd& covariates) {
495 data_size_t num_obs = covariates.rows();
498 if (feature_sort_indices_.size() != num_obs){
499 feature_sort_indices_.resize(num_obs, 0);
501 std::iota(feature_sort_indices_.begin(), feature_sort_indices_.end(), 0);
506 auto comp_op = [&](
size_t const &l,
size_t const &r) {
return std::less<double>{}(covariates(l, feature_index_), covariates(r, feature_index_)); };
507 std::stable_sort(feature_sort_indices_.begin(), feature_sort_indices_.end(), comp_op);
511 std::vector<data_size_t> feature_sort_indices_;
512 int32_t feature_index_;
519 num_features_ = covariates.cols();
520 feature_presort_.resize(num_features_);
521 for (
int i = 0; i < num_features_; i++) {
528 FeaturePresortRoot* GetFeaturePresort(
int feature_num) {
return feature_presort_[feature_num].get(); }
531 std::vector<std::unique_ptr<FeaturePresortRoot>> feature_presort_;
549 feature_index_ = feature_index;
550 feature_type_ = feature_type;
551 num_obs_ = covariates.rows();
555 data_size_t node_offset = 0;
556 node_offset_sizes_.emplace_back(node_offset, num_obs_);
565 void SplitFeatureNumeric(Eigen::MatrixXd& covariates, int32_t node_id, int32_t feature_index,
double split_value);
568 void SplitFeatureCategorical(Eigen::MatrixXd& covariates, int32_t node_id, int32_t feature_index, std::vector<std::uint32_t>
const& category_list);
571 data_size_t
NodeBegin(int32_t node_id) {
return node_offset_sizes_[node_id].Begin();}
574 data_size_t
NodeEnd(int32_t node_id) {
return node_offset_sizes_[node_id].End();}
577 data_size_t
NodeSize(int32_t node_id) {
return node_offset_sizes_[node_id].Size();}
595 void AddLeftRightNodes(data_size_t left_node_begin, data_size_t left_node_size, data_size_t right_node_begin, data_size_t right_node_size);
598 std::vector<NodeOffsetSize> node_offset_sizes_;
599 int32_t feature_index_;
600 FeatureType feature_type_;
601 data_size_t num_obs_;
608 num_features_ = covariates.cols();
609 feature_partitions_.resize(num_features_);
611 for (
int i = 0; i < num_features_; i++) {
612 feature_presort_root = feature_presort_root_container->GetFeaturePresort(i);
613 feature_partitions_[i].reset(
new FeaturePresortPartition(feature_presort_root, covariates, i, feature_types[i]));
619 for (
int i = 0; i < num_features_; i++) {
620 feature_partitions_[i]->SplitFeature(covariates, node_id, feature_split, split);
625 void PartitionNode(Eigen::MatrixXd& covariates,
int node_id,
int feature_split,
double split_value) {
626 for (
int i = 0; i < num_features_; i++) {
627 feature_partitions_[i]->SplitFeatureNumeric(covariates, node_id, feature_split, split_value);
632 void PartitionNode(Eigen::MatrixXd& covariates,
int node_id,
int feature_split, std::vector<std::uint32_t>
const& category_list) {
633 for (
int i = 0; i < num_features_; i++) {
634 feature_partitions_[i]->SplitFeatureCategorical(covariates, node_id, feature_split, category_list);
640 return feature_partitions_[feature_index]->NodeBegin(node_id);
644 data_size_t
NodeEnd(
int node_id,
int feature_index) {
645 return feature_partitions_[feature_index]->NodeEnd(node_id);
649 data_size_t
NodeSize(
int node_id,
int feature_index) {
650 return feature_partitions_[feature_index]->NodeSize(node_id);
653 std::vector<data_size_t>::iterator NodeBeginIterator(
int node_id,
int feature_index) {
654 data_size_t node_begin =
NodeBegin(node_id, feature_index);
655 auto begin_iter = feature_partitions_[feature_index]->feature_sort_indices_.begin();
656 return begin_iter + node_begin;
659 std::vector<data_size_t>::iterator NodeEndIterator(
int node_id,
int feature_index) {
660 data_size_t node_end =
NodeEnd(node_id, feature_index);
661 auto begin_iter = feature_partitions_[feature_index]->feature_sort_indices_.begin();
662 return begin_iter + node_end;
666 std::vector<data_size_t>
NodeIndices(
int node_id,
int feature_index) {
667 return feature_partitions_[feature_index]->NodeIndices(node_id);
671 data_size_t
SortIndex(data_size_t j,
int feature_index) {
return feature_partitions_[feature_index]->SortIndex(j); }
675 feature_partitions_[feature_index]->UpdateObservationMapping(node_id, tree_id, sample_node_mapper);
679 std::vector<std::unique_ptr<FeaturePresortPartition>> feature_partitions_;
Internal wrapper around Eigen::VectorXd interface for univariate floating point data....
Definition data.h:194
Data structure that tracks pre-sorted feature values through a tree's split lifecycle.
Definition partition_tracker.h:545
void SplitFeature(Eigen::MatrixXd &covariates, int32_t node_id, int32_t feature_index, TreeSplit &split)
Split numeric / ordered categorical feature and update sort indices.
data_size_t NodeEnd(int32_t node_id)
End position of node indexed by node_id.
Definition partition_tracker.h:574
std::vector< data_size_t > NodeIndices(int node_id)
Data indices for a given node.
std::vector< data_size_t > feature_sort_indices_
Feature sort indices.
Definition partition_tracker.h:592
data_size_t NodeSize(int32_t node_id)
Size (in observations) of node indexed by node_id.
Definition partition_tracker.h:577
FeatureType GetFeatureType()
Feature type.
Definition partition_tracker.h:586
data_size_t SortIndex(data_size_t j)
Feature sort index j.
Definition partition_tracker.h:583
data_size_t NodeBegin(int32_t node_id)
Start position of node indexed by node_id.
Definition partition_tracker.h:571
void UpdateObservationMapping(int node_id, int tree_id, SampleNodeMapper *sample_node_mapper)
Update SampleNodeMapper for all the observations in node_id.
void SplitFeatureNumeric(Eigen::MatrixXd &covariates, int32_t node_id, int32_t feature_index, double split_value)
Split numeric / ordered categorical feature and update sort indices.
void SplitFeatureCategorical(Eigen::MatrixXd &covariates, int32_t node_id, int32_t feature_index, std::vector< std::uint32_t > const &category_list)
Split unordered categorical feature and update sort indices.
Container class for FeaturePresortRoot objects stored for every feature in a dataset.
Definition partition_tracker.h:516
Data structure for presorting a feature by its values.
Definition partition_tracker.h:484
Mapping nodes to the indices they contain.
Definition partition_tracker.h:240
void PartitionNode(Eigen::MatrixXd &covariates, int node_id, int left_node_id, int right_node_id, int feature_split, double split_value)
Partition a node based on a new split rule.
void PartitionNode(Eigen::MatrixXd &covariates, int node_id, int left_node_id, int right_node_id, int feature_split, TreeSplit &split)
Partition a node based on a new split rule.
int RightNode(int node_id)
Right child of node_id.
int Parent(int node_id)
Parent node_id.
data_size_t NodeEnd(int node_id)
One past the last index of data points contained in node_id.
std::vector< data_size_t > indices_
Data indices.
Definition partition_tracker.h:290
void PartitionNode(Eigen::MatrixXd &covariates, int node_id, int left_node_id, int right_node_id, int feature_split, std::vector< std::uint32_t > const &category_list)
Partition a node based on a new split rule.
bool RightNodeIsLeaf(int node_id)
Whether node_id's right child is a leaf.
void UpdateObservationMapping(int node_id, int tree_id, SampleNodeMapper *sample_node_mapper)
Update SampleNodeMapper for all the observations in node_id.
data_size_t NodeSize(int node_id)
Number of data points contained in node_id.
std::vector< data_size_t > NodeIndices(int node_id)
Data indices for a given node.
void ReconstituteFromTree(Tree &tree, ForestDataset &dataset)
Reconstitute a tree partition tracker from root based on a tree.
bool IsLeaf(int node_id)
Whether node_id is a leaf.
void PruneNodeToLeaf(int node_id)
Convert a (currently split) node to a leaf.
bool LeftNodeIsLeaf(int node_id)
Whether node_id's left child is a leaf.
int LeftNode(int node_id)
Left child of node_id.
bool IsValidNode(int node_id)
Whether node_id is a valid node.
data_size_t NodeBegin(int node_id)
First index of data points contained in node_id.
API for loading and accessing data used to sample tree ensembles The covariates / bases / weights use...
Definition data.h:272
"Superclass" wrapper around tracking data structures for forest sampling algorithms
Definition partition_tracker.h:50
ForestTracker(Eigen::MatrixXd &covariates, std::vector< FeatureType > &feature_types, int num_trees, int num_observations)
Construct a new ForestTracker object.
Tracking cutpoints available at a given node.
Definition partition_tracker.h:447
Class storing sample-node map for each tree in an ensemble.
Definition partition_tracker.h:171
Class storing sample-prediction map for each tree in an ensemble.
Definition partition_tracker.h:130
Data structure for tracking observations through a tree partition with each feature pre-sorted.
Definition partition_tracker.h:605
data_size_t NodeSize(int node_id, int feature_index)
One past the last index of data points contained in node_id.
Definition partition_tracker.h:649
void UpdateObservationMapping(int node_id, int tree_id, SampleNodeMapper *sample_node_mapper, int feature_index=0)
Update SampleNodeMapper for all the observations in node_id.
Definition partition_tracker.h:674
std::vector< data_size_t > NodeIndices(int node_id, int feature_index)
Data indices for a given node.
Definition partition_tracker.h:666
void PartitionNode(Eigen::MatrixXd &covariates, int node_id, int feature_split, double split_value)
Partition a node based on a new split rule.
Definition partition_tracker.h:625
void PartitionNode(Eigen::MatrixXd &covariates, int node_id, int feature_split, TreeSplit &split)
Partition a node based on a new split rule.
Definition partition_tracker.h:618
data_size_t SortIndex(data_size_t j, int feature_index)
Feature sort index j for feature_index.
Definition partition_tracker.h:671
data_size_t NodeBegin(int node_id, int feature_index)
First index of data points contained in node_id.
Definition partition_tracker.h:639
void PartitionNode(Eigen::MatrixXd &covariates, int node_id, int feature_split, std::vector< std::uint32_t > const &category_list)
Partition a node based on a new split rule.
Definition partition_tracker.h:632
data_size_t NodeEnd(int node_id, int feature_index)
One past the last index of data points contained in node_id.
Definition partition_tracker.h:644
Class storing a "forest," or an ensemble of decision trees.
Definition ensemble.h:37
Representation of arbitrary tree split rules, including numeric split rules (X[,i] <= c) and categori...
Definition tree.h:961
bool SplitTrue(double fvalue)
Whether a given covariate value is True or False on the rule defined by a TreeSplit object.
Definition tree.h:993
Decision tree data structure.
Definition tree.h:69
std::vector< std::int32_t > const & GetLeaves() const
Get indices of all leaf nodes.
Definition tree.h:567
Mapping nodes to the indices they contain.
Definition partition_tracker.h:314
void PartitionTreeNode(Eigen::MatrixXd &covariates, int tree_id, int node_id, int left_node_id, int right_node_id, int feature_split, std::vector< std::uint32_t > const &category_list)
Partition a node based on a new split rule.
Definition partition_tracker.h:338
void UpdateObservationMapping(Tree *tree, int tree_id, SampleNodeMapper *sample_node_mapper)
Update SampleNodeMapper for all the observations in tree.
Definition partition_tracker.h:425
int RightNode(int tree_id, int node_id)
Right child of node_id.
Definition partition_tracker.h:410
bool IsLeaf(int tree_id, int node_id)
Whether node_id is a leaf.
Definition partition_tracker.h:353
bool RightNodeIsLeaf(int tree_id, int node_id)
Whether node_id's right child is a leaf.
Definition partition_tracker.h:368
bool IsValidNode(int tree_id, int node_id)
Whether node_id is a valid node.
Definition partition_tracker.h:358
void UpdateObservationMapping(int node_id, int tree_id, SampleNodeMapper *sample_node_mapper)
Update SampleNodeMapper for all the observations in node_id.
Definition partition_tracker.h:420
data_size_t NodeBegin(int tree_id, int node_id)
First index of data points contained in node_id.
Definition partition_tracker.h:373
int NumTrees()
Number of trees.
Definition partition_tracker.h:435
int LeftNode(int tree_id, int node_id)
Left child of node_id.
Definition partition_tracker.h:405
void ResetTreeToRoot(int tree_id, data_size_t n)
Convert a tree to root.
Definition partition_tracker.h:343
void PartitionTreeNode(Eigen::MatrixXd &covariates, int tree_id, int node_id, int left_node_id, int right_node_id, int feature_split, TreeSplit &split)
Partition a node based on a new split rule.
Definition partition_tracker.h:328
data_size_t NodeSize(int tree_id, int node_id)
One past the last index of data points contained in node_id.
Definition partition_tracker.h:395
bool LeftNodeIsLeaf(int tree_id, int node_id)
Whether node_id's left child is a leaf.
Definition partition_tracker.h:363
int Parent(int tree_id, int node_id)
Parent node_id.
Definition partition_tracker.h:400
void ReconstituteFromForest(TreeEnsemble &forest, ForestDataset &dataset)
Reconstruct the node sample tracker based on the splits in a forest.
data_size_t NodeEnd(int tree_id, int node_id)
One past the last index of data points contained in node_id.
Definition partition_tracker.h:378
void PartitionTreeNode(Eigen::MatrixXd &covariates, int tree_id, int node_id, int left_node_id, int right_node_id, int feature_split, double split_value)
Partition a node based on a new split rule.
Definition partition_tracker.h:333
FeatureUnsortedPartition * GetFeaturePartition(int i)
Number of trees.
Definition partition_tracker.h:438
std::vector< data_size_t > TreeNodeIndices(int tree_id, int node_id)
Data indices for a given node.
Definition partition_tracker.h:415
void PruneTreeNodeToLeaf(int tree_id, int node_id)
Convert a (currently split) node to a leaf.
Definition partition_tracker.h:348
Definition category_tracker.h:40