39#ifndef STOCHTREE_CUTPOINT_CANDIDATES_H_
40#define STOCHTREE_CUTPOINT_CANDIDATES_H_
42#include <stochtree/meta.h>
43#include <stochtree/partition_tracker.h>
52 FeatureCutpointGrid(
int cutpoint_grid_size) : node_stride_begin_{}, node_stride_length_{}, cutpoint_grid_size_{cutpoint_grid_size} {}
57 void CalculateStrides(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals,
SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index, std::vector<FeatureType>& feature_types);
75 int32_t
BinLength(
int i) {
return node_stride_length_.at(i);}
78 int32_t
BinEndIndex(
int i) {
return node_stride_begin_.at(i) + node_stride_length_.at(i);}
87 std::vector<std::uint32_t> out;
89 for (
int j = 0; j < bin_stop; j++) {
90 out.push_back(
static_cast<std::uint32_t
>(cutpoint_values_.at(j)));
97 std::vector<data_size_t> node_stride_begin_;
98 std::vector<data_size_t> node_stride_length_;
99 std::vector<double> cutpoint_values_;
100 int32_t cutpoint_grid_size_;
103 void EnumerateNumericCutpointsDeduplication(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals,
SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, data_size_t node_size, int32_t feature_index);
106 void ScanNumericCutpoints(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals,
SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, data_size_t node_size, int32_t feature_index);
112 CutpointGridContainer(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals,
int cutpoint_grid_size) {
113 num_features_ = covariates.cols();
114 feature_cutpoint_grid_.resize(num_features_);
115 for (
int i = 0; i < num_features_; i++) {
118 cutpoint_grid_size_ = cutpoint_grid_size;
123 void Reset(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals,
int cutpoint_grid_size) {
124 num_features_ = covariates.cols();
125 feature_cutpoint_grid_.resize(num_features_);
126 for (
int i = 0; i < num_features_; i++) {
129 cutpoint_grid_size_ = cutpoint_grid_size;
133 void CalculateStrides(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals,
SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index, std::vector<FeatureType>& feature_types) {
134 feature_cutpoint_grid_[feature_index]->CalculateStrides(covariates, residuals, feature_node_sort_tracker, node_id, node_begin, node_end, feature_index, feature_types);
141 int32_t
NumCutpoints(
int feature_index) {
return feature_cutpoint_grid_[feature_index]->NumCutpoints();}
144 int32_t
BinStartIndex(
int i,
int feature_index) {
return feature_cutpoint_grid_[feature_index]->BinStartIndex(i);}
147 int32_t
BinLength(
int i,
int feature_index) {
return feature_cutpoint_grid_[feature_index]->BinLength(i);}
150 int32_t
BinEndIndex(
int i,
int feature_index) {
return feature_cutpoint_grid_[feature_index]->BinEndIndex(i);}
153 double CutpointValue(
int i,
int feature_index) {
return feature_cutpoint_grid_[feature_index]->CutpointValue(i);}
159 return feature_cutpoint_grid_[feature_index]->CutpointVector(i);
162 FeatureCutpointGrid* GetFeatureCutpointGrid(
int feature_num) {
return feature_cutpoint_grid_[feature_num].get(); }
165 std::vector<std::unique_ptr<FeatureCutpointGrid>> feature_cutpoint_grid_;
167 int cutpoint_grid_size_;
178 void CalculateStrides(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals,
SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index);
191 return (std::find(nodes_enumerated_.begin(), nodes_enumerated_.end(), node_id) != nodes_enumerated_.end());
199 std::vector<data_size_t> node_stride_length_;
202 int32_t cutpoint_grid_size_;
203 std::vector<int32_t> nodes_enumerated_;
204 int32_t current_node_;
Container class for FeatureCutpointGrid objects stored for every feature in a dataset.
Definition cutpoint_candidates.h:110
double CutpointValue(int i, int feature_index)
Value of the upper-bound (cutpoint) implied by bin i.
Definition cutpoint_candidates.h:153
int32_t BinStartIndex(int i, int feature_index)
Beginning index of bin i.
Definition cutpoint_candidates.h:144
int32_t BinLength(int i, int feature_index)
Size of bin i.
Definition cutpoint_candidates.h:147
int32_t NumCutpoints(int feature_index)
Number of potential cutpoints enumerated.
Definition cutpoint_candidates.h:141
std::vector< std::uint32_t > CutpointVector(int i, int feature_index)
Vector of cutpoint values up to and including bin i Helper function for converting categorical split ...
Definition cutpoint_candidates.h:158
void CalculateStrides(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index, std::vector< FeatureType > &feature_types)
Calculate strides.
Definition cutpoint_candidates.h:133
int32_t CutpointGridSize()
Max size of cutpoint grid.
Definition cutpoint_candidates.h:138
int32_t BinEndIndex(int i, int feature_index)
Beginning index of bin i.
Definition cutpoint_candidates.h:150
Computing and tracking cutpoints available for a given feature at a given node Store cutpoint bins in...
Definition cutpoint_candidates.h:50
int32_t NumCutpoints()
Number of potential cutpoints enumerated.
Definition cutpoint_candidates.h:69
void CalculateStridesOrderedCategorical(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index)
Split numeric / ordered categorical feature and update sort indices.
std::vector< std::uint32_t > CutpointVector(int i)
Vector of cutpoint values up to and including bin i Helper function for converting categorical split ...
Definition cutpoint_candidates.h:86
void CalculateStrides(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index, std::vector< FeatureType > &feature_types)
Calculate strides.
int32_t BinLength(int i)
Size of bin i.
Definition cutpoint_candidates.h:75
void CalculateStridesNumeric(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index)
Split numeric / ordered categorical feature and update sort indices.
void CalculateStridesUnorderedCategorical(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index)
Split unordered categorical feature and update sort indices.
int32_t BinEndIndex(int i)
Beginning index of bin i.
Definition cutpoint_candidates.h:78
int32_t BinStartIndex(int i)
Beginning index of bin i.
Definition cutpoint_candidates.h:72
double CutpointValue(int i)
Value of the upper-bound (cutpoint) implied by bin i.
Definition cutpoint_candidates.h:81
Computing and tracking cutpoints available for a given feature at a given node.
Definition cutpoint_candidates.h:171
void CalculateStridesCategorical(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, data_size_t node_begin, data_size_t node_end, int32_t feature_index)
Split unordered categorical feature and update sort indices.
int32_t NumCutpoints()
Number of potential cutpoints enumerated.
Definition cutpoint_candidates.h:187
void CalculateStridesNumeric(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, data_size_t node_begin, data_size_t node_end, int32_t feature_index)
Split numeric / ordered categorical feature and update sort indices.
int32_t CurrentNodeEvaluated()
Node id of the node that has been most recently evaluated.
Definition cutpoint_candidates.h:195
bool NodeCutpointEvaluated(int32_t node_id)
Whether a cutpoint grid has been enumerated for a given node.
Definition cutpoint_candidates.h:190
std::vector< data_size_t > node_stride_begin_
Vectors of node stride starting points and stride lengths.
Definition cutpoint_candidates.h:198
void CalculateStrides(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index)
Calculate strides.
Data structure for tracking observations through a tree partition with each feature pre-sorted.
Definition partition_tracker.h:602
Definition category_tracker.h:36