39#ifndef STOCHTREE_CUTPOINT_CANDIDATES_H_
40#define STOCHTREE_CUTPOINT_CANDIDATES_H_
42#include <stochtree/meta.h>
43#include <stochtree/partition_tracker.h>
54 FeatureCutpointGrid(
int cutpoint_grid_size) : node_stride_begin_{}, node_stride_length_{}, cutpoint_grid_size_{cutpoint_grid_size} {}
59 void CalculateStrides(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals,
SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index, std::vector<FeatureType>& feature_types);
77 int32_t
BinLength(
int i) {
return node_stride_length_.at(i);}
80 int32_t
BinEndIndex(
int i) {
return node_stride_begin_.at(i) + node_stride_length_.at(i);}
89 std::vector<std::uint32_t> out;
91 for (
int j = 0; j < bin_stop; j++) {
92 out.push_back(
static_cast<std::uint32_t
>(cutpoint_values_.at(j)));
99 std::vector<data_size_t> node_stride_begin_;
100 std::vector<data_size_t> node_stride_length_;
101 std::vector<double> cutpoint_values_;
102 int32_t cutpoint_grid_size_;
105 void EnumerateNumericCutpointsDeduplication(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals,
SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, data_size_t node_size, int32_t feature_index);
108 void ScanNumericCutpoints(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals,
SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, data_size_t node_size, int32_t feature_index);
114 CutpointGridContainer(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals,
int cutpoint_grid_size) {
115 num_features_ = covariates.cols();
116 feature_cutpoint_grid_.resize(num_features_);
117 for (
int i = 0; i < num_features_; i++) {
120 cutpoint_grid_size_ = cutpoint_grid_size;
125 void Reset(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals,
int cutpoint_grid_size) {
126 num_features_ = covariates.cols();
127 feature_cutpoint_grid_.resize(num_features_);
128 for (
int i = 0; i < num_features_; i++) {
131 cutpoint_grid_size_ = cutpoint_grid_size;
135 void CalculateStrides(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals,
SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index, std::vector<FeatureType>& feature_types) {
136 feature_cutpoint_grid_[feature_index]->CalculateStrides(covariates, residuals, feature_node_sort_tracker, node_id, node_begin, node_end, feature_index, feature_types);
143 int32_t
NumCutpoints(
int feature_index) {
return feature_cutpoint_grid_[feature_index]->NumCutpoints();}
146 int32_t
BinStartIndex(
int i,
int feature_index) {
return feature_cutpoint_grid_[feature_index]->BinStartIndex(i);}
149 int32_t
BinLength(
int i,
int feature_index) {
return feature_cutpoint_grid_[feature_index]->BinLength(i);}
152 int32_t
BinEndIndex(
int i,
int feature_index) {
return feature_cutpoint_grid_[feature_index]->BinEndIndex(i);}
155 double CutpointValue(
int i,
int feature_index) {
return feature_cutpoint_grid_[feature_index]->CutpointValue(i);}
161 return feature_cutpoint_grid_[feature_index]->CutpointVector(i);
164 FeatureCutpointGrid* GetFeatureCutpointGrid(
int feature_num) {
return feature_cutpoint_grid_[feature_num].get(); }
167 std::vector<std::unique_ptr<FeatureCutpointGrid>> feature_cutpoint_grid_;
169 int cutpoint_grid_size_;
180 void CalculateStrides(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals,
SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index);
193 return (std::find(nodes_enumerated_.begin(), nodes_enumerated_.end(), node_id) != nodes_enumerated_.end());
201 std::vector<data_size_t> node_stride_length_;
204 int32_t cutpoint_grid_size_;
205 std::vector<int32_t> nodes_enumerated_;
206 int32_t current_node_;
Container class for FeatureCutpointGrid objects stored for every feature in a dataset.
Definition cutpoint_candidates.h:112
double CutpointValue(int i, int feature_index)
Value of the upper-bound (cutpoint) implied by bin i.
Definition cutpoint_candidates.h:155
int32_t BinStartIndex(int i, int feature_index)
Beginning index of bin i.
Definition cutpoint_candidates.h:146
int32_t BinLength(int i, int feature_index)
Size of bin i.
Definition cutpoint_candidates.h:149
int32_t NumCutpoints(int feature_index)
Number of potential cutpoints enumerated.
Definition cutpoint_candidates.h:143
std::vector< std::uint32_t > CutpointVector(int i, int feature_index)
Vector of cutpoint values up to and including bin i Helper function for converting categorical split ...
Definition cutpoint_candidates.h:160
void CalculateStrides(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index, std::vector< FeatureType > &feature_types)
Calculate strides.
Definition cutpoint_candidates.h:135
int32_t CutpointGridSize()
Max size of cutpoint grid.
Definition cutpoint_candidates.h:140
int32_t BinEndIndex(int i, int feature_index)
Beginning index of bin i.
Definition cutpoint_candidates.h:152
Computing and tracking cutpoints available for a given feature at a given node Store cutpoint bins in...
Definition cutpoint_candidates.h:52
int32_t NumCutpoints()
Number of potential cutpoints enumerated.
Definition cutpoint_candidates.h:71
void CalculateStridesOrderedCategorical(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index)
Split numeric / ordered categorical feature and update sort indices.
std::vector< std::uint32_t > CutpointVector(int i)
Vector of cutpoint values up to and including bin i Helper function for converting categorical split ...
Definition cutpoint_candidates.h:88
void CalculateStrides(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index, std::vector< FeatureType > &feature_types)
Calculate strides.
int32_t BinLength(int i)
Size of bin i.
Definition cutpoint_candidates.h:77
void CalculateStridesNumeric(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index)
Split numeric / ordered categorical feature and update sort indices.
void CalculateStridesUnorderedCategorical(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index)
Split unordered categorical feature and update sort indices.
int32_t BinEndIndex(int i)
Beginning index of bin i.
Definition cutpoint_candidates.h:80
int32_t BinStartIndex(int i)
Beginning index of bin i.
Definition cutpoint_candidates.h:74
double CutpointValue(int i)
Value of the upper-bound (cutpoint) implied by bin i.
Definition cutpoint_candidates.h:83
Computing and tracking cutpoints available for a given feature at a given node.
Definition cutpoint_candidates.h:173
void CalculateStridesCategorical(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, data_size_t node_begin, data_size_t node_end, int32_t feature_index)
Split unordered categorical feature and update sort indices.
int32_t NumCutpoints()
Number of potential cutpoints enumerated.
Definition cutpoint_candidates.h:189
void CalculateStridesNumeric(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, data_size_t node_begin, data_size_t node_end, int32_t feature_index)
Split numeric / ordered categorical feature and update sort indices.
int32_t CurrentNodeEvaluated()
Node id of the node that has been most recently evaluated.
Definition cutpoint_candidates.h:197
bool NodeCutpointEvaluated(int32_t node_id)
Whether a cutpoint grid has been enumerated for a given node.
Definition cutpoint_candidates.h:192
std::vector< data_size_t > node_stride_begin_
Vectors of node stride starting points and stride lengths.
Definition cutpoint_candidates.h:200
void CalculateStrides(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index)
Calculate strides.
Data structure for tracking observations through a tree partition with each feature pre-sorted.
Definition partition_tracker.h:601
Definition category_tracker.h:40