25#ifndef STOCHTREE_PARTITION_TRACKER_H_ 
   26#define STOCHTREE_PARTITION_TRACKER_H_ 
   28#include <stochtree/data.h> 
   29#include <stochtree/ensemble.h> 
   30#include <stochtree/log.h> 
   31#include <stochtree/openmp_utils.h> 
   32#include <stochtree/tree.h> 
   40class SampleNodeMapper;
 
   41class SamplePredMapper;
 
   42class UnsortedNodeSampleTracker;
 
   43class SortedNodeSampleTracker;
 
   44class FeaturePresortRootContainer;
 
   57  ForestTracker(Eigen::MatrixXd& covariates, std::vector<FeatureType>& feature_types, 
int num_trees, 
int num_observations);
 
   60  void AssignAllSamplesToRoot();
 
   61  void AssignAllSamplesToRoot(int32_t tree_num);
 
   62  void AssignAllSamplesToConstantPrediction(
double value);
 
   63  void AssignAllSamplesToConstantPrediction(int32_t tree_num, 
double value);
 
   67  void ResetRoot(Eigen::MatrixXd& covariates, std::vector<FeatureType>& feature_types, int32_t tree_num);
 
   68  void AddSplit(Eigen::MatrixXd& covariates, 
TreeSplit& split, int32_t split_feature, int32_t tree_id, int32_t split_node_id, int32_t left_node_id, int32_t right_node_id, 
bool keep_sorted = 
false, 
int num_threads = -1);
 
   69  void RemoveSplit(Eigen::MatrixXd& covariates, 
Tree* tree, int32_t tree_id, int32_t split_node_id, int32_t left_node_id, int32_t right_node_id, 
bool keep_sorted = 
false);
 
   70  double GetSamplePrediction(data_size_t sample_id);
 
   71  double GetTreeSamplePrediction(data_size_t sample_id, 
int tree_id);
 
   72  void UpdateVarWeightsFromInternalPredictions(
ForestDataset& dataset);
 
   73  void SetSamplePrediction(data_size_t sample_id, 
double value);
 
   74  void SetTreeSamplePrediction(data_size_t sample_id, 
int tree_id, 
double value);
 
   75  void SyncPredictions();
 
   76  data_size_t GetNodeId(
int observation_num, 
int tree_num);
 
   77  data_size_t UnsortedNodeBegin(
int tree_id, 
int node_id);
 
   78  data_size_t UnsortedNodeEnd(
int tree_id, 
int node_id);
 
   79  data_size_t UnsortedNodeSize(
int tree_id, 
int node_id);
 
   80  data_size_t SortedNodeBegin(
int node_id, 
int feature_id);
 
   81  data_size_t SortedNodeEnd(
int node_id, 
int feature_id);
 
   82  data_size_t SortedNodeSize(
int node_id, 
int feature_id);
 
   83  std::vector<data_size_t>::iterator UnsortedNodeBeginIterator(
int tree_id, 
int node_id);
 
   84  std::vector<data_size_t>::iterator UnsortedNodeEndIterator(
int tree_id, 
int node_id);
 
   85  std::vector<data_size_t>::iterator SortedNodeBeginIterator(
int node_id, 
int feature_id);
 
   86  std::vector<data_size_t>::iterator SortedNodeEndIterator(
int node_id, 
int feature_id);
 
   88  SampleNodeMapper* GetSampleNodeMapper() {
return sample_node_mapper_.get();}
 
   89  UnsortedNodeSampleTracker* GetUnsortedNodeSampleTracker() {
return unsorted_node_sample_tracker_.get();}
 
   90  SortedNodeSampleTracker* GetSortedNodeSampleTracker() {
return sorted_node_sample_tracker_.get();}
 
   91  int GetNumObservations() {
return num_observations_;}
 
   92  int GetNumTrees() {
return num_trees_;}
 
   93  int GetNumFeatures() {
return num_features_;}
 
   94  bool Initialized() {
return initialized_;}
 
   98  std::vector<double> sum_predictions_;
 
  100  std::unique_ptr<SamplePredMapper> sample_pred_mapper_;
 
  102  std::unique_ptr<SampleNodeMapper> sample_node_mapper_;
 
  106  std::unique_ptr<UnsortedNodeSampleTracker> unsorted_node_sample_tracker_;
 
  110  std::unique_ptr<FeaturePresortRootContainer> presort_container_;
 
  111  std::unique_ptr<SortedNodeSampleTracker> sorted_node_sample_tracker_;
 
  112  std::vector<FeatureType> feature_types_;
 
  114  int num_observations_;
 
  116  bool initialized_{
false};
 
  118  void UpdatePredictionsInternal(TreeEnsemble* ensemble, Eigen::MatrixXd& covariates, Eigen::MatrixXd& basis);
 
  119  void UpdatePredictionsInternal(TreeEnsemble* ensemble, Eigen::MatrixXd& covariates);
 
  120  void UpdateSampleTrackersInternal(TreeEnsemble& forest, Eigen::MatrixXd& covariates, Eigen::MatrixXd& basis);
 
  121  void UpdateSampleTrackersInternal(TreeEnsemble& forest, Eigen::MatrixXd& covariates);
 
  122  void UpdateSampleTrackersResidualInternalBasis(TreeEnsemble& forest, ForestDataset& dataset, ColumnVector& residual, 
bool is_mean_model);
 
  123  void UpdateSampleTrackersResidualInternalNoBasis(TreeEnsemble& forest, ForestDataset& dataset, ColumnVector& residual, 
bool is_mean_model);
 
 
  130    num_trees_ = num_trees;
 
  131    num_observations_ = num_observations;
 
  133    tree_preds_.resize(num_trees_);
 
  134    for (
int j = 0; j < num_trees_; j++) {
 
  135      tree_preds_[j].resize(num_observations_);
 
  139  inline double GetPred(data_size_t sample_id, 
int tree_id) {
 
  140    CHECK_LT(sample_id, num_observations_);
 
  141    CHECK_LT(tree_id, num_trees_);
 
  142    return tree_preds_[tree_id][sample_id];
 
  145  inline void SetPred(data_size_t sample_id, 
int tree_id, 
double value) {
 
  146    CHECK_LT(sample_id, num_observations_);
 
  147    CHECK_LT(tree_id, num_trees_);
 
  148    tree_preds_[tree_id][sample_id] = value;
 
  151  inline int NumTrees() {
return num_trees_;}
 
  153  inline int NumObservations() {
return num_observations_;}
 
  155  inline void AssignAllSamplesToConstantPrediction(
int tree_id, 
double value) {
 
  156    for (data_size_t i = 0; i < num_observations_; i++) {
 
  157      tree_preds_[tree_id][i] = value;
 
  162  std::vector<std::vector<double>> tree_preds_;
 
  164  data_size_t num_observations_;
 
 
  171    num_trees_ = num_trees;
 
  172    num_observations_ = num_observations;
 
  174    tree_observation_indices_.resize(num_trees_);
 
  175    for (
int j = 0; j < num_trees_; j++) {
 
  176      tree_observation_indices_[j].resize(num_observations_);
 
  181    num_trees_ = other.NumTrees();
 
  182    num_observations_ = other.NumObservations();
 
  184    tree_observation_indices_.resize(num_trees_);
 
  185    for (
int j = 0; j < num_trees_; j++) {
 
  186      tree_observation_indices_[j].resize(num_observations_);
 
  187      for (
int i = 0; i < num_observations_; i++) {
 
  188        tree_observation_indices_[j][i] = other.GetNodeId(i, j);
 
  193  void AddSplit(Eigen::MatrixXd& covariates, 
TreeSplit& split, int32_t split_feature, int32_t tree_id, int32_t split_node_id, int32_t left_node_id, int32_t right_node_id) {
 
  194    CHECK_EQ(num_observations_, covariates.rows());
 
  196    for (
int i = 0; i < num_observations_; i++) {
 
  197      if (tree_observation_indices_[tree_id][i] == split_node_id) {
 
  198        auto fvalue = covariates(i, split_feature);
 
  200          tree_observation_indices_[tree_id][i] = left_node_id;
 
  202          tree_observation_indices_[tree_id][i] = right_node_id;
 
  208  inline data_size_t GetNodeId(data_size_t sample_id, 
int tree_id) {
 
  209    CHECK_LT(sample_id, num_observations_);
 
  210    CHECK_LT(tree_id, num_trees_);
 
  211    return tree_observation_indices_[tree_id][sample_id];
 
  214  inline void SetNodeId(data_size_t sample_id, 
int tree_id, 
int node_id) {
 
  215    CHECK_LT(sample_id, num_observations_);
 
  216    CHECK_LT(tree_id, num_trees_);
 
  217    tree_observation_indices_[tree_id][sample_id] = node_id;
 
  220  inline int NumTrees() {
return num_trees_;}
 
  222  inline int NumObservations() {
return num_observations_;}
 
  224  inline void AssignAllSamplesToRoot(
int tree_id) {
 
  225    for (data_size_t i = 0; i < num_observations_; i++) {
 
  226      tree_observation_indices_[tree_id][i] = 0;
 
  231  std::vector<std::vector<int>> tree_observation_indices_;
 
  233  data_size_t num_observations_;
 
 
  245  void PartitionNode(Eigen::MatrixXd& covariates, 
int node_id, 
int left_node_id, 
int right_node_id, 
int feature_split, 
TreeSplit& split);
 
  248  void PartitionNode(Eigen::MatrixXd& covariates, 
int node_id, 
int left_node_id, 
int right_node_id, 
int feature_split, 
double split_value);
 
  251  void PartitionNode(Eigen::MatrixXd& covariates, 
int node_id, 
int left_node_id, 
int right_node_id, 
int feature_split, std::vector<std::uint32_t> 
const& category_list);
 
  297  std::vector<data_size_t> node_begin_;
 
  298  std::vector<data_size_t> node_length_;
 
  299  std::vector<int32_t> parent_nodes_;
 
  300  std::vector<int32_t> left_nodes_;
 
  301  std::vector<int32_t> right_nodes_;
 
  302  int num_nodes_, num_deleted_nodes_;
 
  303  std::vector<int> deleted_nodes_;
 
  306  void ExpandNodeTrackingVectors(
int node_id, 
int left_node_id, 
int right_node_id, data_size_t node_start_idx, data_size_t num_left, data_size_t num_right);
 
  307  void ConvertLeafParentToLeaf(
int node_id);
 
 
  314    feature_partitions_.resize(num_trees);
 
  315    num_trees_ = num_trees;
 
  316    for (
int i = 0; i < num_trees; i++) {
 
  325  void PartitionTreeNode(Eigen::MatrixXd& covariates, 
int tree_id, 
int node_id, 
int left_node_id, 
int right_node_id, 
int feature_split, 
TreeSplit& split) {
 
  326    return feature_partitions_[tree_id]->PartitionNode(covariates, node_id, left_node_id, right_node_id, feature_split, split);
 
 
  330  void PartitionTreeNode(Eigen::MatrixXd& covariates, 
int tree_id, 
int node_id, 
int left_node_id, 
int right_node_id, 
int feature_split, 
double split_value) {
 
  331    return feature_partitions_[tree_id]->PartitionNode(covariates, node_id, left_node_id, right_node_id, feature_split, split_value);
 
 
  335  void PartitionTreeNode(Eigen::MatrixXd& covariates, 
int tree_id, 
int node_id, 
int left_node_id, 
int right_node_id, 
int feature_split, std::vector<std::uint32_t> 
const& category_list) {
 
  336    return feature_partitions_[tree_id]->PartitionNode(covariates, node_id, left_node_id, right_node_id, feature_split, category_list);
 
 
  346    return feature_partitions_[tree_id]->PruneNodeToLeaf(node_id);
 
 
  351    return feature_partitions_[tree_id]->IsLeaf(node_id);
 
 
  356    return feature_partitions_[tree_id]->IsValidNode(node_id);
 
 
  361    return feature_partitions_[tree_id]->LeftNodeIsLeaf(node_id);
 
 
  366    return feature_partitions_[tree_id]->RightNodeIsLeaf(node_id);
 
 
  371    return feature_partitions_[tree_id]->NodeBegin(node_id);
 
 
  375  data_size_t 
NodeEnd(
int tree_id, 
int node_id) {
 
  376    return feature_partitions_[tree_id]->NodeEnd(node_id);
 
 
  379  std::vector<data_size_t>::iterator NodeBeginIterator(
int tree_id, 
int node_id) {
 
  380    data_size_t node_begin = feature_partitions_[tree_id]->NodeBegin(node_id);
 
  381    auto begin_iter = feature_partitions_[tree_id]->indices_.begin();
 
  382    return begin_iter + node_begin;
 
  385  std::vector<data_size_t>::iterator NodeEndIterator(
int tree_id, 
int node_id) {
 
  386    int node_end = feature_partitions_[tree_id]->NodeEnd(node_id);
 
  387    auto begin_iter = feature_partitions_[tree_id]->indices_.begin();
 
  388    return begin_iter + node_end;
 
  393    return feature_partitions_[tree_id]->NodeSize(node_id);
 
 
  398    return feature_partitions_[tree_id]->Parent(node_id);
 
 
  403    return feature_partitions_[tree_id]->LeftNode(node_id);
 
 
  408    return feature_partitions_[tree_id]->RightNode(node_id);
 
 
  413    return feature_partitions_[tree_id]->NodeIndices(node_id);
 
 
  418    feature_partitions_[tree_id]->UpdateObservationMapping(node_id, tree_id, sample_node_mapper);
 
 
  423    std::vector<int> leaves = tree->
GetLeaves();
 
  425    for (
int i = 0; i < leaves.size(); i++) {
 
 
  439  std::vector<std::unique_ptr<FeatureUnsortedPartition>> feature_partitions_;
 
 
  446  NodeOffsetSize(data_size_t node_offset, data_size_t node_size) : node_begin_{node_offset}, node_size_{node_size}, presorted_{
false} {
 
  447    node_end_ = node_begin_ + node_size_;
 
  452  void SetSorted() {presorted_ = 
true;}
 
  454  bool IsSorted() {
return presorted_;}
 
  456  data_size_t Begin() {
return node_begin_;}
 
  458  data_size_t End() {
return node_end_;}
 
  460  data_size_t Size() {
return node_size_;}
 
  463  data_size_t node_begin_;
 
  464  data_size_t node_size_;
 
  465  data_size_t node_end_;
 
 
  484  FeaturePresortRoot(Eigen::MatrixXd& covariates, int32_t feature_index, FeatureType feature_type) {
 
  485    feature_index_ = feature_index;
 
  486    ArgsortRoot(covariates);
 
  491  void ArgsortRoot(Eigen::MatrixXd& covariates) {
 
  492    data_size_t num_obs = covariates.rows();
 
  495    if (feature_sort_indices_.size() != num_obs){
 
  496      feature_sort_indices_.resize(num_obs, 0);
 
  498    std::iota(feature_sort_indices_.begin(), feature_sort_indices_.end(), 0);
 
  503    auto comp_op = [&](
size_t const &l, 
size_t const &r) { 
return std::less<double>{}(covariates(l, feature_index_), covariates(r, feature_index_)); };
 
  504    std::stable_sort(feature_sort_indices_.begin(), feature_sort_indices_.end(), comp_op);
 
  508  std::vector<data_size_t> feature_sort_indices_;
 
  509  int32_t feature_index_;
 
 
  516    num_features_ = covariates.cols();
 
  517    feature_presort_.resize(num_features_);
 
  518    for (
int i = 0; i < num_features_; i++) {
 
  525  FeaturePresortRoot* GetFeaturePresort(
int feature_num) {
return feature_presort_[feature_num].get(); }
 
  528  std::vector<std::unique_ptr<FeaturePresortRoot>> feature_presort_;
 
 
  546    feature_index_ = feature_index;
 
  547    feature_type_ = feature_type;
 
  548    num_obs_ = covariates.rows();
 
  552    data_size_t node_offset = 0;
 
  553    node_offset_sizes_.emplace_back(node_offset, num_obs_);
 
  562  void SplitFeatureNumeric(Eigen::MatrixXd& covariates, int32_t node_id, int32_t feature_index, 
double split_value);
 
  565  void SplitFeatureCategorical(Eigen::MatrixXd& covariates, int32_t node_id, int32_t feature_index, std::vector<std::uint32_t> 
const& category_list);
 
  568  data_size_t 
NodeBegin(int32_t node_id) {
return node_offset_sizes_[node_id].Begin();}
 
  571  data_size_t 
NodeEnd(int32_t node_id) {
return node_offset_sizes_[node_id].End();}
 
  574  data_size_t 
NodeSize(int32_t node_id) {
return node_offset_sizes_[node_id].Size();}
 
  592  void AddLeftRightNodes(data_size_t left_node_begin, data_size_t left_node_size, data_size_t right_node_begin, data_size_t right_node_size);
 
  595  std::vector<NodeOffsetSize> node_offset_sizes_;
 
  596  int32_t feature_index_;
 
  597  FeatureType feature_type_;
 
  598  data_size_t num_obs_;
 
 
  605    num_features_ = covariates.cols();
 
  606    feature_partitions_.resize(num_features_);
 
  608    for (
int i = 0; i < num_features_; i++) {
 
  609      feature_presort_root = feature_presort_root_container->GetFeaturePresort(i);
 
  610      feature_partitions_[i].reset(
new FeaturePresortPartition(feature_presort_root, covariates, i, feature_types[i]));
 
  615  void PartitionNode(Eigen::MatrixXd& covariates, 
int node_id, 
int feature_split, 
TreeSplit& split, 
int num_threads = -1) {
 
  616    StochTree::ParallelFor(0, num_features_, num_threads, [&](
int i) {
 
  617      feature_partitions_[i]->SplitFeature(covariates, node_id, feature_split, split);
 
 
  622  void PartitionNode(Eigen::MatrixXd& covariates, 
int node_id, 
int feature_split, 
double split_value, 
int num_threads = -1) {
 
  623    StochTree::ParallelFor(0, num_features_, num_threads, [&](
int i) {
 
  624      feature_partitions_[i]->SplitFeatureNumeric(covariates, node_id, feature_split, split_value);
 
 
  629  void PartitionNode(Eigen::MatrixXd& covariates, 
int node_id, 
int feature_split, std::vector<std::uint32_t> 
const& category_list, 
int num_threads = -1) {
 
  630    StochTree::ParallelFor(0, num_features_, num_threads, [&](
int i) {
 
  631      feature_partitions_[i]->SplitFeatureCategorical(covariates, node_id, feature_split, category_list);
 
 
  637    return feature_partitions_[feature_index]->NodeBegin(node_id);
 
 
  641  data_size_t 
NodeEnd(
int node_id, 
int feature_index) {
 
  642    return feature_partitions_[feature_index]->NodeEnd(node_id);
 
 
  646  data_size_t 
NodeSize(
int node_id, 
int feature_index) {
 
  647    return feature_partitions_[feature_index]->NodeSize(node_id);
 
 
  650  std::vector<data_size_t>::iterator NodeBeginIterator(
int node_id, 
int feature_index) {
 
  651    data_size_t node_begin = 
NodeBegin(node_id, feature_index);
 
  652    auto begin_iter = feature_partitions_[feature_index]->feature_sort_indices_.begin();
 
  653    return begin_iter + node_begin;
 
  656  std::vector<data_size_t>::iterator NodeEndIterator(
int node_id, 
int feature_index) {
 
  657    data_size_t node_end = 
NodeEnd(node_id, feature_index);
 
  658    auto begin_iter = feature_partitions_[feature_index]->feature_sort_indices_.begin();
 
  659    return begin_iter + node_end;
 
  663  std::vector<data_size_t> 
NodeIndices(
int node_id, 
int feature_index) {
 
  664    return feature_partitions_[feature_index]->NodeIndices(node_id);
 
 
  668  data_size_t 
SortIndex(data_size_t j, 
int feature_index) {
return feature_partitions_[feature_index]->SortIndex(j); }
 
  672    feature_partitions_[feature_index]->UpdateObservationMapping(node_id, tree_id, sample_node_mapper);
 
 
  676  std::vector<std::unique_ptr<FeaturePresortPartition>> feature_partitions_;
 
 
Internal wrapper around Eigen::VectorXd interface for univariate floating point data....
Definition data.h:193
 
Data structure that tracks pre-sorted feature values through a tree's split lifecycle.
Definition partition_tracker.h:542
 
void SplitFeature(Eigen::MatrixXd &covariates, int32_t node_id, int32_t feature_index, TreeSplit &split)
Split numeric / ordered categorical feature and update sort indices.
 
data_size_t NodeEnd(int32_t node_id)
End position of node indexed by node_id.
Definition partition_tracker.h:571
 
std::vector< data_size_t > NodeIndices(int node_id)
Data indices for a given node.
 
std::vector< data_size_t > feature_sort_indices_
Feature sort indices.
Definition partition_tracker.h:589
 
data_size_t NodeSize(int32_t node_id)
Size (in observations) of node indexed by node_id.
Definition partition_tracker.h:574
 
FeatureType GetFeatureType()
Feature type.
Definition partition_tracker.h:583
 
data_size_t SortIndex(data_size_t j)
Feature sort index j.
Definition partition_tracker.h:580
 
data_size_t NodeBegin(int32_t node_id)
Start position of node indexed by node_id.
Definition partition_tracker.h:568
 
void UpdateObservationMapping(int node_id, int tree_id, SampleNodeMapper *sample_node_mapper)
Update SampleNodeMapper for all the observations in node_id.
 
void SplitFeatureNumeric(Eigen::MatrixXd &covariates, int32_t node_id, int32_t feature_index, double split_value)
Split numeric / ordered categorical feature and update sort indices.
 
void SplitFeatureCategorical(Eigen::MatrixXd &covariates, int32_t node_id, int32_t feature_index, std::vector< std::uint32_t > const &category_list)
Split unordered categorical feature and update sort indices.
 
Container class for FeaturePresortRoot objects stored for every feature in a dataset.
Definition partition_tracker.h:513
 
Data structure for presorting a feature by its values.
Definition partition_tracker.h:481
 
Mapping nodes to the indices they contain.
Definition partition_tracker.h:237
 
void PartitionNode(Eigen::MatrixXd &covariates, int node_id, int left_node_id, int right_node_id, int feature_split, double split_value)
Partition a node based on a new split rule.
 
void PartitionNode(Eigen::MatrixXd &covariates, int node_id, int left_node_id, int right_node_id, int feature_split, TreeSplit &split)
Partition a node based on a new split rule.
 
int RightNode(int node_id)
Right child of node_id.
 
int Parent(int node_id)
Parent node_id.
 
data_size_t NodeEnd(int node_id)
One past the last index of data points contained in node_id.
 
std::vector< data_size_t > indices_
Data indices.
Definition partition_tracker.h:287
 
void PartitionNode(Eigen::MatrixXd &covariates, int node_id, int left_node_id, int right_node_id, int feature_split, std::vector< std::uint32_t > const &category_list)
Partition a node based on a new split rule.
 
bool RightNodeIsLeaf(int node_id)
Whether node_id's right child is a leaf.
 
void UpdateObservationMapping(int node_id, int tree_id, SampleNodeMapper *sample_node_mapper)
Update SampleNodeMapper for all the observations in node_id.
 
data_size_t NodeSize(int node_id)
Number of data points contained in node_id.
 
std::vector< data_size_t > NodeIndices(int node_id)
Data indices for a given node.
 
void ReconstituteFromTree(Tree &tree, ForestDataset &dataset)
Reconstitute a tree partition tracker from root based on a tree.
 
bool IsLeaf(int node_id)
Whether node_id is a leaf.
 
void PruneNodeToLeaf(int node_id)
Convert a (currently split) node to a leaf.
 
bool LeftNodeIsLeaf(int node_id)
Whether node_id's left child is a leaf.
 
int LeftNode(int node_id)
Left child of node_id.
 
bool IsValidNode(int node_id)
Whether node_id is a valid node.
 
data_size_t NodeBegin(int node_id)
First index of data points contained in node_id.
 
API for loading and accessing data used to sample tree ensembles The covariates / bases / weights use...
Definition data.h:271
 
"Superclass" wrapper around tracking data structures for forest sampling algorithms
Definition partition_tracker.h:47
 
ForestTracker(Eigen::MatrixXd &covariates, std::vector< FeatureType > &feature_types, int num_trees, int num_observations)
Construct a new ForestTracker object.
 
Tracking cutpoints available at a given node.
Definition partition_tracker.h:444
 
Class storing sample-node map for each tree in an ensemble.
Definition partition_tracker.h:168
 
Class storing sample-prediction map for each tree in an ensemble.
Definition partition_tracker.h:127
 
Data structure for tracking observations through a tree partition with each feature pre-sorted.
Definition partition_tracker.h:602
 
void PartitionNode(Eigen::MatrixXd &covariates, int node_id, int feature_split, TreeSplit &split, int num_threads=-1)
Partition a node based on a new split rule.
Definition partition_tracker.h:615
 
data_size_t NodeSize(int node_id, int feature_index)
One past the last index of data points contained in node_id.
Definition partition_tracker.h:646
 
void UpdateObservationMapping(int node_id, int tree_id, SampleNodeMapper *sample_node_mapper, int feature_index=0)
Update SampleNodeMapper for all the observations in node_id.
Definition partition_tracker.h:671
 
void PartitionNode(Eigen::MatrixXd &covariates, int node_id, int feature_split, double split_value, int num_threads=-1)
Partition a node based on a new split rule.
Definition partition_tracker.h:622
 
std::vector< data_size_t > NodeIndices(int node_id, int feature_index)
Data indices for a given node.
Definition partition_tracker.h:663
 
data_size_t SortIndex(data_size_t j, int feature_index)
Feature sort index j for feature_index.
Definition partition_tracker.h:668
 
data_size_t NodeBegin(int node_id, int feature_index)
First index of data points contained in node_id.
Definition partition_tracker.h:636
 
void PartitionNode(Eigen::MatrixXd &covariates, int node_id, int feature_split, std::vector< std::uint32_t > const &category_list, int num_threads=-1)
Partition a node based on a new split rule.
Definition partition_tracker.h:629
 
data_size_t NodeEnd(int node_id, int feature_index)
One past the last index of data points contained in node_id.
Definition partition_tracker.h:641
 
Class storing a "forest," or an ensemble of decision trees.
Definition ensemble.h:31
 
Representation of arbitrary tree split rules, including numeric split rules (X[,i] <= c) and categori...
Definition tree.h:958
 
bool SplitTrue(double fvalue)
Whether a given covariate value is True or False on the rule defined by a TreeSplit object.
Definition tree.h:990
 
Decision tree data structure.
Definition tree.h:66
 
std::vector< std::int32_t > const & GetLeaves() const
Get indices of all leaf nodes.
Definition tree.h:564
 
Mapping nodes to the indices they contain.
Definition partition_tracker.h:311
 
void PartitionTreeNode(Eigen::MatrixXd &covariates, int tree_id, int node_id, int left_node_id, int right_node_id, int feature_split, std::vector< std::uint32_t > const &category_list)
Partition a node based on a new split rule.
Definition partition_tracker.h:335
 
void UpdateObservationMapping(Tree *tree, int tree_id, SampleNodeMapper *sample_node_mapper)
Update SampleNodeMapper for all the observations in tree.
Definition partition_tracker.h:422
 
int RightNode(int tree_id, int node_id)
Right child of node_id.
Definition partition_tracker.h:407
 
bool IsLeaf(int tree_id, int node_id)
Whether node_id is a leaf.
Definition partition_tracker.h:350
 
bool RightNodeIsLeaf(int tree_id, int node_id)
Whether node_id's right child is a leaf.
Definition partition_tracker.h:365
 
bool IsValidNode(int tree_id, int node_id)
Whether node_id is a valid node.
Definition partition_tracker.h:355
 
void UpdateObservationMapping(int node_id, int tree_id, SampleNodeMapper *sample_node_mapper)
Update SampleNodeMapper for all the observations in node_id.
Definition partition_tracker.h:417
 
data_size_t NodeBegin(int tree_id, int node_id)
First index of data points contained in node_id.
Definition partition_tracker.h:370
 
int NumTrees()
Number of trees.
Definition partition_tracker.h:432
 
int LeftNode(int tree_id, int node_id)
Left child of node_id.
Definition partition_tracker.h:402
 
void ResetTreeToRoot(int tree_id, data_size_t n)
Convert a tree to root.
Definition partition_tracker.h:340
 
void PartitionTreeNode(Eigen::MatrixXd &covariates, int tree_id, int node_id, int left_node_id, int right_node_id, int feature_split, TreeSplit &split)
Partition a node based on a new split rule.
Definition partition_tracker.h:325
 
data_size_t NodeSize(int tree_id, int node_id)
One past the last index of data points contained in node_id.
Definition partition_tracker.h:392
 
bool LeftNodeIsLeaf(int tree_id, int node_id)
Whether node_id's left child is a leaf.
Definition partition_tracker.h:360
 
int Parent(int tree_id, int node_id)
Parent node_id.
Definition partition_tracker.h:397
 
void ReconstituteFromForest(TreeEnsemble &forest, ForestDataset &dataset)
Reconstruct the node sample tracker based on the splits in a forest.
 
data_size_t NodeEnd(int tree_id, int node_id)
One past the last index of data points contained in node_id.
Definition partition_tracker.h:375
 
void PartitionTreeNode(Eigen::MatrixXd &covariates, int tree_id, int node_id, int left_node_id, int right_node_id, int feature_split, double split_value)
Partition a node based on a new split rule.
Definition partition_tracker.h:330
 
FeatureUnsortedPartition * GetFeaturePartition(int i)
Return a pointer to the feature partition tracking tree i.
Definition partition_tracker.h:435
 
std::vector< data_size_t > TreeNodeIndices(int tree_id, int node_id)
Data indices for a given node.
Definition partition_tracker.h:412
 
void PruneTreeNodeToLeaf(int tree_id, int node_id)
Convert a (currently split) node to a leaf.
Definition partition_tracker.h:345
 
Definition category_tracker.h:36