StochTree 0.0.1
Loading...
Searching...
No Matches
partition_tracker.h
1
25#ifndef STOCHTREE_PARTITION_TRACKER_H_
26#define STOCHTREE_PARTITION_TRACKER_H_
27
28#include <stochtree/data.h>
29#include <stochtree/ensemble.h>
30#include <stochtree/log.h>
31#include <stochtree/tree.h>
32
33#include <cmath>
34#include <numeric>
35#include <random>
36#include <set>
37#include <string>
38#include <vector>
39
40namespace StochTree {
41
43class SampleNodeMapper;
44class SamplePredMapper;
45class UnsortedNodeSampleTracker;
46class SortedNodeSampleTracker;
47class FeaturePresortRootContainer;
48
51 public:
60 ForestTracker(Eigen::MatrixXd& covariates, std::vector<FeatureType>& feature_types, int num_trees, int num_observations);
62 void ReconstituteFromForest(TreeEnsemble& forest, ForestDataset& dataset, ColumnVector& residual, bool is_mean_model);
63 void AssignAllSamplesToRoot();
64 void AssignAllSamplesToRoot(int32_t tree_num);
65 void AssignAllSamplesToConstantPrediction(double value);
66 void AssignAllSamplesToConstantPrediction(int32_t tree_num, double value);
67 void UpdatePredictions(TreeEnsemble* ensemble, ForestDataset& dataset);
68 void UpdateSampleTrackers(TreeEnsemble& forest, ForestDataset& dataset);
69 void UpdateSampleTrackersResidual(TreeEnsemble& forest, ForestDataset& dataset, ColumnVector& residual, bool is_mean_model);
70 void ResetRoot(Eigen::MatrixXd& covariates, std::vector<FeatureType>& feature_types, int32_t tree_num);
71 void AddSplit(Eigen::MatrixXd& covariates, TreeSplit& split, int32_t split_feature, int32_t tree_id, int32_t split_node_id, int32_t left_node_id, int32_t right_node_id, bool keep_sorted = false);
72 void RemoveSplit(Eigen::MatrixXd& covariates, Tree* tree, int32_t tree_id, int32_t split_node_id, int32_t left_node_id, int32_t right_node_id, bool keep_sorted = false);
73 double GetSamplePrediction(data_size_t sample_id);
74 double GetTreeSamplePrediction(data_size_t sample_id, int tree_id);
75 void UpdateVarWeightsFromInternalPredictions(ForestDataset& dataset);
76 void SetSamplePrediction(data_size_t sample_id, double value);
77 void SetTreeSamplePrediction(data_size_t sample_id, int tree_id, double value);
78 void SyncPredictions();
79 data_size_t GetNodeId(int observation_num, int tree_num);
80 data_size_t UnsortedNodeBegin(int tree_id, int node_id);
81 data_size_t UnsortedNodeEnd(int tree_id, int node_id);
82 data_size_t UnsortedNodeSize(int tree_id, int node_id);
83 data_size_t SortedNodeBegin(int node_id, int feature_id);
84 data_size_t SortedNodeEnd(int node_id, int feature_id);
85 data_size_t SortedNodeSize(int node_id, int feature_id);
86 std::vector<data_size_t>::iterator UnsortedNodeBeginIterator(int tree_id, int node_id);
87 std::vector<data_size_t>::iterator UnsortedNodeEndIterator(int tree_id, int node_id);
88 std::vector<data_size_t>::iterator SortedNodeBeginIterator(int node_id, int feature_id);
89 std::vector<data_size_t>::iterator SortedNodeEndIterator(int node_id, int feature_id);
90 SamplePredMapper* GetSamplePredMapper() {return sample_pred_mapper_.get();}
91 SampleNodeMapper* GetSampleNodeMapper() {return sample_node_mapper_.get();}
92 UnsortedNodeSampleTracker* GetUnsortedNodeSampleTracker() {return unsorted_node_sample_tracker_.get();}
93 SortedNodeSampleTracker* GetSortedNodeSampleTracker() {return sorted_node_sample_tracker_.get();}
94
95 private:
97 std::vector<double> sum_predictions_;
99 std::unique_ptr<SamplePredMapper> sample_pred_mapper_;
101 std::unique_ptr<SampleNodeMapper> sample_node_mapper_;
105 std::unique_ptr<UnsortedNodeSampleTracker> unsorted_node_sample_tracker_;
109 std::unique_ptr<FeaturePresortRootContainer> presort_container_;
110 std::unique_ptr<SortedNodeSampleTracker> sorted_node_sample_tracker_;
111 std::vector<FeatureType> feature_types_;
112 int num_trees_;
113 int num_observations_;
114 int num_features_;
115 bool initialized_{false};
116
117 void UpdatePredictionsInternal(TreeEnsemble* ensemble, Eigen::MatrixXd& covariates, Eigen::MatrixXd& basis);
118 void UpdatePredictionsInternal(TreeEnsemble* ensemble, Eigen::MatrixXd& covariates);
119 void UpdateSampleTrackersInternal(TreeEnsemble& forest, Eigen::MatrixXd& covariates, Eigen::MatrixXd& basis);
120 void UpdateSampleTrackersInternal(TreeEnsemble& forest, Eigen::MatrixXd& covariates);
121 void UpdateSampleTrackersResidualInternalBasis(TreeEnsemble& forest, ForestDataset& dataset, ColumnVector& residual, bool is_mean_model);
122 void UpdateSampleTrackersResidualInternalNoBasis(TreeEnsemble& forest, ForestDataset& dataset, ColumnVector& residual, bool is_mean_model);
123};
124
127 public:
128 SamplePredMapper(int num_trees, data_size_t num_observations) {
129 num_trees_ = num_trees;
130 num_observations_ = num_observations;
131 // Initialize the vector of vectors of leaf indices for each tree
132 tree_preds_.resize(num_trees_);
133 for (int j = 0; j < num_trees_; j++) {
134 tree_preds_[j].resize(num_observations_);
135 }
136 }
137
138 inline double GetPred(data_size_t sample_id, int tree_id) {
139 CHECK_LT(sample_id, num_observations_);
140 CHECK_LT(tree_id, num_trees_);
141 return tree_preds_[tree_id][sample_id];
142 }
143
144 inline void SetPred(data_size_t sample_id, int tree_id, double value) {
145 CHECK_LT(sample_id, num_observations_);
146 CHECK_LT(tree_id, num_trees_);
147 tree_preds_[tree_id][sample_id] = value;
148 }
149
150 inline int NumTrees() {return num_trees_;}
151
152 inline int NumObservations() {return num_observations_;}
153
154 inline void AssignAllSamplesToConstantPrediction(int tree_id, double value) {
155 for (data_size_t i = 0; i < num_observations_; i++) {
156 tree_preds_[tree_id][i] = value;
157 }
158 }
159
160 private:
161 std::vector<std::vector<double>> tree_preds_;
162 int num_trees_;
163 data_size_t num_observations_;
164};
165
168 public:
169 SampleNodeMapper(int num_trees, data_size_t num_observations) {
170 num_trees_ = num_trees;
171 num_observations_ = num_observations;
172 // Initialize the vector of vectors of leaf indices for each tree
173 tree_observation_indices_.resize(num_trees_);
174 for (int j = 0; j < num_trees_; j++) {
175 tree_observation_indices_[j].resize(num_observations_);
176 }
177 }
178
180 num_trees_ = other.NumTrees();
181 num_observations_ = other.NumObservations();
182 // Initialize the vector of vectors of leaf indices for each tree
183 tree_observation_indices_.resize(num_trees_);
184 for (int j = 0; j < num_trees_; j++) {
185 tree_observation_indices_[j].resize(num_observations_);
186 for (int i = 0; i < num_observations_; i++) {
187 tree_observation_indices_[j][i] = other.GetNodeId(i, j);
188 }
189 }
190 }
191
192 void AddSplit(Eigen::MatrixXd& covariates, TreeSplit& split, int32_t split_feature, int32_t tree_id, int32_t split_node_id, int32_t left_node_id, int32_t right_node_id) {
193 CHECK_EQ(num_observations_, covariates.rows());
194 // Eigen::MatrixXd X = covariates.GetData();
195 for (int i = 0; i < num_observations_; i++) {
196 if (tree_observation_indices_[tree_id][i] == split_node_id) {
197 auto fvalue = covariates(i, split_feature);
198 if (split.SplitTrue(fvalue)) {
199 tree_observation_indices_[tree_id][i] = left_node_id;
200 } else {
201 tree_observation_indices_[tree_id][i] = right_node_id;
202 }
203 }
204 }
205 }
206
207 inline data_size_t GetNodeId(data_size_t sample_id, int tree_id) {
208 CHECK_LT(sample_id, num_observations_);
209 CHECK_LT(tree_id, num_trees_);
210 return tree_observation_indices_[tree_id][sample_id];
211 }
212
213 inline void SetNodeId(data_size_t sample_id, int tree_id, int node_id) {
214 CHECK_LT(sample_id, num_observations_);
215 CHECK_LT(tree_id, num_trees_);
216 tree_observation_indices_[tree_id][sample_id] = node_id;
217 }
218
219 inline int NumTrees() {return num_trees_;}
220
221 inline int NumObservations() {return num_observations_;}
222
223 inline void AssignAllSamplesToRoot(int tree_id) {
224 for (data_size_t i = 0; i < num_observations_; i++) {
225 tree_observation_indices_[tree_id][i] = 0;
226 }
227 }
228
229 private:
230 std::vector<std::vector<int>> tree_observation_indices_;
231 int num_trees_;
232 data_size_t num_observations_;
233};
234
237 public:
238 FeatureUnsortedPartition(data_size_t n);
239
242
244 void PartitionNode(Eigen::MatrixXd& covariates, int node_id, int left_node_id, int right_node_id, int feature_split, TreeSplit& split);
245
247 void PartitionNode(Eigen::MatrixXd& covariates, int node_id, int left_node_id, int right_node_id, int feature_split, double split_value);
248
250 void PartitionNode(Eigen::MatrixXd& covariates, int node_id, int left_node_id, int right_node_id, int feature_split, std::vector<std::uint32_t> const& category_list);
251
253 void PruneNodeToLeaf(int node_id);
254
256 bool IsLeaf(int node_id);
257
259 bool IsValidNode(int node_id);
260
262 bool LeftNodeIsLeaf(int node_id);
263
265 bool RightNodeIsLeaf(int node_id);
266
268 data_size_t NodeBegin(int node_id);
269
271 data_size_t NodeEnd(int node_id);
272
274 data_size_t NodeSize(int node_id);
275
277 int Parent(int node_id);
278
280 int LeftNode(int node_id);
281
283 int RightNode(int node_id);
284
286 std::vector<data_size_t> indices_;
287
289 std::vector<data_size_t> NodeIndices(int node_id);
290
292 void UpdateObservationMapping(int node_id, int tree_id, SampleNodeMapper* sample_node_mapper);
293
294 private:
295 // Vectors tracking indices in each node
296 std::vector<data_size_t> node_begin_;
297 std::vector<data_size_t> node_length_;
298 std::vector<int32_t> parent_nodes_;
299 std::vector<int32_t> left_nodes_;
300 std::vector<int32_t> right_nodes_;
301 int num_nodes_, num_deleted_nodes_;
302 std::vector<int> deleted_nodes_;
303
304 // Private helper functions
305 void ExpandNodeTrackingVectors(int node_id, int left_node_id, int right_node_id, data_size_t node_start_idx, data_size_t num_left, data_size_t num_right);
306 void ConvertLeafParentToLeaf(int node_id);
307};
308
311 public:
312 UnsortedNodeSampleTracker(data_size_t n, int num_trees) {
313 feature_partitions_.resize(num_trees);
314 num_trees_ = num_trees;
315 for (int i = 0; i < num_trees; i++) {
316 feature_partitions_[i].reset(new FeatureUnsortedPartition(n));
317 }
318 }
319
322
324 void PartitionTreeNode(Eigen::MatrixXd& covariates, int tree_id, int node_id, int left_node_id, int right_node_id, int feature_split, TreeSplit& split) {
325 return feature_partitions_[tree_id]->PartitionNode(covariates, node_id, left_node_id, right_node_id, feature_split, split);
326 }
327
329 void PartitionTreeNode(Eigen::MatrixXd& covariates, int tree_id, int node_id, int left_node_id, int right_node_id, int feature_split, double split_value) {
330 return feature_partitions_[tree_id]->PartitionNode(covariates, node_id, left_node_id, right_node_id, feature_split, split_value);
331 }
332
334 void PartitionTreeNode(Eigen::MatrixXd& covariates, int tree_id, int node_id, int left_node_id, int right_node_id, int feature_split, std::vector<std::uint32_t> const& category_list) {
335 return feature_partitions_[tree_id]->PartitionNode(covariates, node_id, left_node_id, right_node_id, feature_split, category_list);
336 }
337
339 void ResetTreeToRoot(int tree_id, data_size_t n) {
340 feature_partitions_[tree_id].reset(new FeatureUnsortedPartition(n));;
341 }
342
344 void PruneTreeNodeToLeaf(int tree_id, int node_id) {
345 return feature_partitions_[tree_id]->PruneNodeToLeaf(node_id);
346 }
347
349 bool IsLeaf(int tree_id, int node_id) {
350 return feature_partitions_[tree_id]->IsLeaf(node_id);
351 }
352
354 bool IsValidNode(int tree_id, int node_id) {
355 return feature_partitions_[tree_id]->IsValidNode(node_id);
356 }
357
359 bool LeftNodeIsLeaf(int tree_id, int node_id) {
360 return feature_partitions_[tree_id]->LeftNodeIsLeaf(node_id);
361 }
362
364 bool RightNodeIsLeaf(int tree_id, int node_id) {
365 return feature_partitions_[tree_id]->RightNodeIsLeaf(node_id);
366 }
367
369 data_size_t NodeBegin(int tree_id, int node_id) {
370 return feature_partitions_[tree_id]->NodeBegin(node_id);
371 }
372
374 data_size_t NodeEnd(int tree_id, int node_id) {
375 return feature_partitions_[tree_id]->NodeEnd(node_id);
376 }
377
378 std::vector<data_size_t>::iterator NodeBeginIterator(int tree_id, int node_id) {
379 data_size_t node_begin = feature_partitions_[tree_id]->NodeBegin(node_id);
380 auto begin_iter = feature_partitions_[tree_id]->indices_.begin();
381 return begin_iter + node_begin;
382 }
383
384 std::vector<data_size_t>::iterator NodeEndIterator(int tree_id, int node_id) {
385 int node_end = feature_partitions_[tree_id]->NodeEnd(node_id);
386 auto begin_iter = feature_partitions_[tree_id]->indices_.begin();
387 return begin_iter + node_end;
388 }
389
391 data_size_t NodeSize(int tree_id, int node_id) {
392 return feature_partitions_[tree_id]->NodeSize(node_id);
393 }
394
396 int Parent(int tree_id, int node_id) {
397 return feature_partitions_[tree_id]->Parent(node_id);
398 }
399
401 int LeftNode(int tree_id, int node_id) {
402 return feature_partitions_[tree_id]->LeftNode(node_id);
403 }
404
406 int RightNode(int tree_id, int node_id) {
407 return feature_partitions_[tree_id]->RightNode(node_id);
408 }
409
411 std::vector<data_size_t> TreeNodeIndices(int tree_id, int node_id) {
412 return feature_partitions_[tree_id]->NodeIndices(node_id);
413 }
414
416 void UpdateObservationMapping(int node_id, int tree_id, SampleNodeMapper* sample_node_mapper) {
417 feature_partitions_[tree_id]->UpdateObservationMapping(node_id, tree_id, sample_node_mapper);
418 }
419
421 void UpdateObservationMapping(Tree* tree, int tree_id, SampleNodeMapper* sample_node_mapper) {
422 std::vector<int> leaves = tree->GetLeaves();
423 int leaf;
424 for (int i = 0; i < leaves.size(); i++) {
425 leaf = leaves[i];
426 UpdateObservationMapping(leaf, tree_id, sample_node_mapper);
427 }
428 }
429
431 int NumTrees() { return num_trees_; }
432
434 FeatureUnsortedPartition* GetFeaturePartition(int i) { return feature_partitions_[i].get(); }
435
436 private:
437 // Vectors of feature partitions
438 std::vector<std::unique_ptr<FeatureUnsortedPartition>> feature_partitions_;
439 int num_trees_;
440};
441
444 public:
445 NodeOffsetSize(data_size_t node_offset, data_size_t node_size) : node_begin_{node_offset}, node_size_{node_size}, presorted_{false} {
446 node_end_ = node_begin_ + node_size_;
447 }
448
449 ~NodeOffsetSize() {}
450
451 void SetSorted() {presorted_ = true;}
452
453 bool IsSorted() {return presorted_;}
454
455 data_size_t Begin() {return node_begin_;}
456
457 data_size_t End() {return node_end_;}
458
459 data_size_t Size() {return node_size_;}
460
461 private:
462 data_size_t node_begin_;
463 data_size_t node_size_;
464 data_size_t node_end_;
465 bool presorted_;
466};
467
470
482 public:
483 FeaturePresortRoot(Eigen::MatrixXd& covariates, int32_t feature_index, FeatureType feature_type) {
484 feature_index_ = feature_index;
485 ArgsortRoot(covariates);
486 }
487
489
490 void ArgsortRoot(Eigen::MatrixXd& covariates) {
491 data_size_t num_obs = covariates.rows();
492
493 // Make a vector of indices from 0 to num_obs - 1
494 if (feature_sort_indices_.size() != num_obs){
495 feature_sort_indices_.resize(num_obs, 0);
496 }
497 std::iota(feature_sort_indices_.begin(), feature_sort_indices_.end(), 0);
498
499 // Define a custom comparator to be used with stable_sort:
500 // For every two indices l and r store as elements of `data_sort_indices_`,
501 // compare them for sorting purposes by indexing the covariate's raw data with both l and r
502 auto comp_op = [&](size_t const &l, size_t const &r) { return std::less<double>{}(covariates(l, feature_index_), covariates(r, feature_index_)); };
503 std::stable_sort(feature_sort_indices_.begin(), feature_sort_indices_.end(), comp_op);
504 }
505
506 private:
507 std::vector<data_size_t> feature_sort_indices_;
508 int32_t feature_index_;
509};
510
513 public:
514 FeaturePresortRootContainer(Eigen::MatrixXd& covariates, std::vector<FeatureType>& feature_types) {
515 num_features_ = covariates.cols();
516 feature_presort_.resize(num_features_);
517 for (int i = 0; i < num_features_; i++) {
518 feature_presort_[i].reset(new FeaturePresortRoot(covariates, i, feature_types[i]));
519 }
520 }
521
523
524 FeaturePresortRoot* GetFeaturePresort(int feature_num) {return feature_presort_[feature_num].get(); }
525
526 private:
527 std::vector<std::unique_ptr<FeaturePresortRoot>> feature_presort_;
528 int num_features_;
529};
530
542 public:
543 FeaturePresortPartition(FeaturePresortRoot* feature_presort_root, Eigen::MatrixXd& covariates, int32_t feature_index, FeatureType feature_type) {
544 // Unpack all feature details
545 feature_index_ = feature_index;
546 feature_type_ = feature_type;
547 num_obs_ = covariates.rows();
548 feature_sort_indices_ = feature_presort_root->feature_sort_indices_;
549
550 // Initialize new tree to root
551 data_size_t node_offset = 0;
552 node_offset_sizes_.emplace_back(node_offset, num_obs_);
553 }
554
556
558 void SplitFeature(Eigen::MatrixXd& covariates, int32_t node_id, int32_t feature_index, TreeSplit& split);
559
561 void SplitFeatureNumeric(Eigen::MatrixXd& covariates, int32_t node_id, int32_t feature_index, double split_value);
562
564 void SplitFeatureCategorical(Eigen::MatrixXd& covariates, int32_t node_id, int32_t feature_index, std::vector<std::uint32_t> const& category_list);
565
567 data_size_t NodeBegin(int32_t node_id) {return node_offset_sizes_[node_id].Begin();}
568
570 data_size_t NodeEnd(int32_t node_id) {return node_offset_sizes_[node_id].End();}
571
573 data_size_t NodeSize(int32_t node_id) {return node_offset_sizes_[node_id].Size();}
574
576 std::vector<data_size_t> NodeIndices(int node_id);
577
579 data_size_t SortIndex(data_size_t j) {return feature_sort_indices_[j];}
580
582 FeatureType GetFeatureType() {return feature_type_;}
583
585 void UpdateObservationMapping(int node_id, int tree_id, SampleNodeMapper* sample_node_mapper);
586
588 std::vector<data_size_t> feature_sort_indices_;
589 private:
591 void AddLeftRightNodes(data_size_t left_node_begin, data_size_t left_node_size, data_size_t right_node_begin, data_size_t right_node_size);
592
594 std::vector<NodeOffsetSize> node_offset_sizes_;
595 int32_t feature_index_;
596 FeatureType feature_type_;
597 data_size_t num_obs_;
598};
599
602 public:
603 SortedNodeSampleTracker(FeaturePresortRootContainer* feature_presort_root_container, Eigen::MatrixXd& covariates, std::vector<FeatureType>& feature_types) {
604 num_features_ = covariates.cols();
605 feature_partitions_.resize(num_features_);
606 FeaturePresortRoot* feature_presort_root;
607 for (int i = 0; i < num_features_; i++) {
608 feature_presort_root = feature_presort_root_container->GetFeaturePresort(i);
609 feature_partitions_[i].reset(new FeaturePresortPartition(feature_presort_root, covariates, i, feature_types[i]));
610 }
611 }
612
614 void PartitionNode(Eigen::MatrixXd& covariates, int node_id, int feature_split, TreeSplit& split) {
615 for (int i = 0; i < num_features_; i++) {
616 feature_partitions_[i]->SplitFeature(covariates, node_id, feature_split, split);
617 }
618 }
619
621 void PartitionNode(Eigen::MatrixXd& covariates, int node_id, int feature_split, double split_value) {
622 for (int i = 0; i < num_features_; i++) {
623 feature_partitions_[i]->SplitFeatureNumeric(covariates, node_id, feature_split, split_value);
624 }
625 }
626
628 void PartitionNode(Eigen::MatrixXd& covariates, int node_id, int feature_split, std::vector<std::uint32_t> const& category_list) {
629 for (int i = 0; i < num_features_; i++) {
630 feature_partitions_[i]->SplitFeatureCategorical(covariates, node_id, feature_split, category_list);
631 }
632 }
633
635 data_size_t NodeBegin(int node_id, int feature_index) {
636 return feature_partitions_[feature_index]->NodeBegin(node_id);
637 }
638
640 data_size_t NodeEnd(int node_id, int feature_index) {
641 return feature_partitions_[feature_index]->NodeEnd(node_id);
642 }
643
645 data_size_t NodeSize(int node_id, int feature_index) {
646 return feature_partitions_[feature_index]->NodeSize(node_id);
647 }
648
649 std::vector<data_size_t>::iterator NodeBeginIterator(int node_id, int feature_index) {
650 data_size_t node_begin = NodeBegin(node_id, feature_index);
651 auto begin_iter = feature_partitions_[feature_index]->feature_sort_indices_.begin();
652 return begin_iter + node_begin;
653 }
654
655 std::vector<data_size_t>::iterator NodeEndIterator(int node_id, int feature_index) {
656 data_size_t node_end = NodeEnd(node_id, feature_index);
657 auto begin_iter = feature_partitions_[feature_index]->feature_sort_indices_.begin();
658 return begin_iter + node_end;
659 }
660
662 std::vector<data_size_t> NodeIndices(int node_id, int feature_index) {
663 return feature_partitions_[feature_index]->NodeIndices(node_id);
664 }
665
667 data_size_t SortIndex(data_size_t j, int feature_index) {return feature_partitions_[feature_index]->SortIndex(j); }
668
670 void UpdateObservationMapping(int node_id, int tree_id, SampleNodeMapper* sample_node_mapper, int feature_index = 0) {
671 feature_partitions_[feature_index]->UpdateObservationMapping(node_id, tree_id, sample_node_mapper);
672 }
673
674 private:
675 std::vector<std::unique_ptr<FeaturePresortPartition>> feature_partitions_;
676 int num_features_;
677};
678
679} // namespace StochTree
680
681#endif // STOCHTREE_PARTITION_TRACKER_H_
Internal wrapper around Eigen::VectorXd interface for univariate floating point data....
Definition data.h:194
Data structure that tracks pre-sorted feature values through a tree's split lifecycle.
Definition partition_tracker.h:541
void SplitFeature(Eigen::MatrixXd &covariates, int32_t node_id, int32_t feature_index, TreeSplit &split)
Split numeric / ordered categorical feature and update sort indices.
data_size_t NodeEnd(int32_t node_id)
End position of node indexed by node_id.
Definition partition_tracker.h:570
std::vector< data_size_t > NodeIndices(int node_id)
Data indices for a given node.
std::vector< data_size_t > feature_sort_indices_
Feature sort indices.
Definition partition_tracker.h:588
data_size_t NodeSize(int32_t node_id)
Size (in observations) of node indexed by node_id.
Definition partition_tracker.h:573
FeatureType GetFeatureType()
Feature type.
Definition partition_tracker.h:582
data_size_t SortIndex(data_size_t j)
Feature sort index j.
Definition partition_tracker.h:579
data_size_t NodeBegin(int32_t node_id)
Start position of node indexed by node_id.
Definition partition_tracker.h:567
void UpdateObservationMapping(int node_id, int tree_id, SampleNodeMapper *sample_node_mapper)
Update SampleNodeMapper for all the observations in node_id.
void SplitFeatureNumeric(Eigen::MatrixXd &covariates, int32_t node_id, int32_t feature_index, double split_value)
Split numeric / ordered categorical feature and update sort indices.
void SplitFeatureCategorical(Eigen::MatrixXd &covariates, int32_t node_id, int32_t feature_index, std::vector< std::uint32_t > const &category_list)
Split unordered categorical feature and update sort indices.
Container class for FeaturePresortRoot objects stored for every feature in a dataset.
Definition partition_tracker.h:512
Data structure for presorting a feature by its values.
Definition partition_tracker.h:480
Mapping nodes to the indices they contain.
Definition partition_tracker.h:236
void PartitionNode(Eigen::MatrixXd &covariates, int node_id, int left_node_id, int right_node_id, int feature_split, double split_value)
Partition a node based on a new split rule.
void PartitionNode(Eigen::MatrixXd &covariates, int node_id, int left_node_id, int right_node_id, int feature_split, TreeSplit &split)
Partition a node based on a new split rule.
int RightNode(int node_id)
Right child of node_id.
int Parent(int node_id)
Parent node_id.
data_size_t NodeEnd(int node_id)
One past the last index of data points contained in node_id.
std::vector< data_size_t > indices_
Data indices.
Definition partition_tracker.h:286
void PartitionNode(Eigen::MatrixXd &covariates, int node_id, int left_node_id, int right_node_id, int feature_split, std::vector< std::uint32_t > const &category_list)
Partition a node based on a new split rule.
bool RightNodeIsLeaf(int node_id)
Whether node_id's right child is a leaf.
void UpdateObservationMapping(int node_id, int tree_id, SampleNodeMapper *sample_node_mapper)
Update SampleNodeMapper for all the observations in node_id.
data_size_t NodeSize(int node_id)
Number of data points contained in node_id.
std::vector< data_size_t > NodeIndices(int node_id)
Data indices for a given node.
void ReconstituteFromTree(Tree &tree, ForestDataset &dataset)
Reconstitute a tree partition tracker from root based on a tree.
bool IsLeaf(int node_id)
Whether node_id is a leaf.
void PruneNodeToLeaf(int node_id)
Convert a (currently split) node to a leaf.
bool LeftNodeIsLeaf(int node_id)
Whether node_id's left child is a leaf.
int LeftNode(int node_id)
Left child of node_id.
bool IsValidNode(int node_id)
Whether node_id is a valid node.
data_size_t NodeBegin(int node_id)
First index of data points contained in node_id.
API for loading and accessing data used to sample tree ensembles The covariates / bases / weights use...
Definition data.h:272
"Superclass" wrapper around tracking data structures for forest sampling algorithms
Definition partition_tracker.h:50
ForestTracker(Eigen::MatrixXd &covariates, std::vector< FeatureType > &feature_types, int num_trees, int num_observations)
Construct a new ForestTracker object.
Tracking cutpoints available at a given node.
Definition partition_tracker.h:443
Class storing sample-node map for each tree in an ensemble.
Definition partition_tracker.h:167
Class storing sample-prediction map for each tree in an ensemble.
Definition partition_tracker.h:126
Data structure for tracking observations through a tree partition with each feature pre-sorted.
Definition partition_tracker.h:601
data_size_t NodeSize(int node_id, int feature_index)
One past the last index of data points contained in node_id.
Definition partition_tracker.h:645
void UpdateObservationMapping(int node_id, int tree_id, SampleNodeMapper *sample_node_mapper, int feature_index=0)
Update SampleNodeMapper for all the observations in node_id.
Definition partition_tracker.h:670
std::vector< data_size_t > NodeIndices(int node_id, int feature_index)
Data indices for a given node.
Definition partition_tracker.h:662
void PartitionNode(Eigen::MatrixXd &covariates, int node_id, int feature_split, double split_value)
Partition a node based on a new split rule.
Definition partition_tracker.h:621
void PartitionNode(Eigen::MatrixXd &covariates, int node_id, int feature_split, TreeSplit &split)
Partition a node based on a new split rule.
Definition partition_tracker.h:614
data_size_t SortIndex(data_size_t j, int feature_index)
Feature sort index j for feature_index.
Definition partition_tracker.h:667
data_size_t NodeBegin(int node_id, int feature_index)
First index of data points contained in node_id.
Definition partition_tracker.h:635
void PartitionNode(Eigen::MatrixXd &covariates, int node_id, int feature_split, std::vector< std::uint32_t > const &category_list)
Partition a node based on a new split rule.
Definition partition_tracker.h:628
data_size_t NodeEnd(int node_id, int feature_index)
One past the last index of data points contained in node_id.
Definition partition_tracker.h:640
Class storing a "forest," or an ensemble of decision trees.
Definition ensemble.h:37
Representation of arbitrary tree split rules, including numeric split rules (X[,i] <= c) and categori...
Definition tree.h:927
bool SplitTrue(double fvalue)
Whether a given covariate value is True or False on the rule defined by a TreeSplit object.
Definition tree.h:959
Decision tree data structure.
Definition tree.h:69
std::vector< std::int32_t > const & GetLeaves() const
Get indices of all leaf nodes.
Definition tree.h:533
Mapping nodes to the indices they contain.
Definition partition_tracker.h:310
void PartitionTreeNode(Eigen::MatrixXd &covariates, int tree_id, int node_id, int left_node_id, int right_node_id, int feature_split, std::vector< std::uint32_t > const &category_list)
Partition a node based on a new split rule.
Definition partition_tracker.h:334
void UpdateObservationMapping(Tree *tree, int tree_id, SampleNodeMapper *sample_node_mapper)
Update SampleNodeMapper for all the observations in tree.
Definition partition_tracker.h:421
int RightNode(int tree_id, int node_id)
Right child of node_id.
Definition partition_tracker.h:406
bool IsLeaf(int tree_id, int node_id)
Whether node_id is a leaf.
Definition partition_tracker.h:349
bool RightNodeIsLeaf(int tree_id, int node_id)
Whether node_id's right child is a leaf.
Definition partition_tracker.h:364
bool IsValidNode(int tree_id, int node_id)
Whether node_id is a valid node.
Definition partition_tracker.h:354
void UpdateObservationMapping(int node_id, int tree_id, SampleNodeMapper *sample_node_mapper)
Update SampleNodeMapper for all the observations in node_id.
Definition partition_tracker.h:416
data_size_t NodeBegin(int tree_id, int node_id)
First index of data points contained in node_id.
Definition partition_tracker.h:369
int NumTrees()
Number of trees.
Definition partition_tracker.h:431
int LeftNode(int tree_id, int node_id)
Left child of node_id.
Definition partition_tracker.h:401
void ResetTreeToRoot(int tree_id, data_size_t n)
Convert a tree to root.
Definition partition_tracker.h:339
void PartitionTreeNode(Eigen::MatrixXd &covariates, int tree_id, int node_id, int left_node_id, int right_node_id, int feature_split, TreeSplit &split)
Partition a node based on a new split rule.
Definition partition_tracker.h:324
data_size_t NodeSize(int tree_id, int node_id)
One past the last index of data points contained in node_id.
Definition partition_tracker.h:391
bool LeftNodeIsLeaf(int tree_id, int node_id)
Whether node_id's left child is a leaf.
Definition partition_tracker.h:359
int Parent(int tree_id, int node_id)
Parent node_id.
Definition partition_tracker.h:396
void ReconstituteFromForest(TreeEnsemble &forest, ForestDataset &dataset)
Reconstruct the node sample tracker based on the splits in a forest.
data_size_t NodeEnd(int tree_id, int node_id)
One past the last index of data points contained in node_id.
Definition partition_tracker.h:374
void PartitionTreeNode(Eigen::MatrixXd &covariates, int tree_id, int node_id, int left_node_id, int right_node_id, int feature_split, double split_value)
Partition a node based on a new split rule.
Definition partition_tracker.h:329
FeatureUnsortedPartition * GetFeaturePartition(int i)
Number of trees.
Definition partition_tracker.h:434
std::vector< data_size_t > TreeNodeIndices(int tree_id, int node_id)
Data indices for a given node.
Definition partition_tracker.h:411
void PruneTreeNodeToLeaf(int tree_id, int node_id)
Convert a (currently split) node to a leaf.
Definition partition_tracker.h:344
Definition category_tracker.h:40