StochTree 0.0.1
Loading...
Searching...
No Matches
cutpoint_candidates.h
1
39#ifndef STOCHTREE_CUTPOINT_CANDIDATES_H_
40#define STOCHTREE_CUTPOINT_CANDIDATES_H_
41
42#include <stochtree/meta.h>
43#include <stochtree/partition_tracker.h>
44
45#include <tuple>
46
47namespace StochTree {
48
53 public:
54 FeatureCutpointGrid(int cutpoint_grid_size) : node_stride_begin_{}, node_stride_length_{}, cutpoint_grid_size_{cutpoint_grid_size} {}
55
57
59 void CalculateStrides(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals, SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index, std::vector<FeatureType>& feature_types);
60
62 void CalculateStridesNumeric(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals, SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index);
63
65 void CalculateStridesOrderedCategorical(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals, SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index);
66
68 void CalculateStridesUnorderedCategorical(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals, SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index);
69
71 int32_t NumCutpoints() {return node_stride_begin_.size();}
72
74 int32_t BinStartIndex(int i) {return node_stride_begin_.at(i);}
75
77 int32_t BinLength(int i) {return node_stride_length_.at(i);}
78
80 int32_t BinEndIndex(int i) {return node_stride_begin_.at(i) + node_stride_length_.at(i);}
81
83 double CutpointValue(int i) {return cutpoint_values_.at(i);}
84
88 std::vector<std::uint32_t> CutpointVector(int i) {
89 std::vector<std::uint32_t> out;
90 int bin_stop = i + 1;
91 for (int j = 0; j < bin_stop; j++) {
92 out.push_back(static_cast<std::uint32_t>(cutpoint_values_.at(j)));
93 }
94 return out;
95 }
96
97 private:
99 std::vector<data_size_t> node_stride_begin_;
100 std::vector<data_size_t> node_stride_length_;
101 std::vector<double> cutpoint_values_;
102 int32_t cutpoint_grid_size_;
103
105 void EnumerateNumericCutpointsDeduplication(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals, SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, data_size_t node_size, int32_t feature_index);
106
108 void ScanNumericCutpoints(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals, SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, data_size_t node_size, int32_t feature_index);
109};
110
113 public:
114 CutpointGridContainer(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals, int cutpoint_grid_size) {
115 num_features_ = covariates.cols();
116 feature_cutpoint_grid_.resize(num_features_);
117 for (int i = 0; i < num_features_; i++) {
118 feature_cutpoint_grid_[i].reset(new FeatureCutpointGrid(cutpoint_grid_size));
119 }
120 cutpoint_grid_size_ = cutpoint_grid_size;
121 }
122
124
125 void Reset(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals, int cutpoint_grid_size) {
126 num_features_ = covariates.cols();
127 feature_cutpoint_grid_.resize(num_features_);
128 for (int i = 0; i < num_features_; i++) {
129 feature_cutpoint_grid_[i].reset(new FeatureCutpointGrid(cutpoint_grid_size));
130 }
131 cutpoint_grid_size_ = cutpoint_grid_size;
132 }
133
135 void CalculateStrides(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals, SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index, std::vector<FeatureType>& feature_types) {
136 feature_cutpoint_grid_[feature_index]->CalculateStrides(covariates, residuals, feature_node_sort_tracker, node_id, node_begin, node_end, feature_index, feature_types);
137 }
138
140 int32_t CutpointGridSize() {return cutpoint_grid_size_;}
141
143 int32_t NumCutpoints(int feature_index) {return feature_cutpoint_grid_[feature_index]->NumCutpoints();}
144
146 int32_t BinStartIndex(int i, int feature_index) {return feature_cutpoint_grid_[feature_index]->BinStartIndex(i);}
147
149 int32_t BinLength(int i, int feature_index) {return feature_cutpoint_grid_[feature_index]->BinLength(i);}
150
152 int32_t BinEndIndex(int i, int feature_index) {return feature_cutpoint_grid_[feature_index]->BinEndIndex(i);}
153
155 double CutpointValue(int i, int feature_index) {return feature_cutpoint_grid_[feature_index]->CutpointValue(i);}
156
160 std::vector<std::uint32_t> CutpointVector(int i, int feature_index) {
161 return feature_cutpoint_grid_[feature_index]->CutpointVector(i);
162 }
163
164 FeatureCutpointGrid* GetFeatureCutpointGrid(int feature_num) {return feature_cutpoint_grid_[feature_num].get(); }
165
166 private:
167 std::vector<std::unique_ptr<FeatureCutpointGrid>> feature_cutpoint_grid_;
168 int num_features_;
169 int cutpoint_grid_size_;
170};
171
174 public:
175 NodeCutpointTracker(int cutpoint_grid_size) : node_stride_begin_{}, node_stride_length_{}, cutpoint_grid_size_{cutpoint_grid_size}, nodes_enumerated_{} {}
176
178
180 void CalculateStrides(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals, SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index);
181
183 void CalculateStridesNumeric(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals, SortedNodeSampleTracker* feature_node_sort_tracker, data_size_t node_begin, data_size_t node_end, int32_t feature_index);
184
186 void CalculateStridesCategorical(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals, SortedNodeSampleTracker* feature_node_sort_tracker, data_size_t node_begin, data_size_t node_end, int32_t feature_index);
187
189 int32_t NumCutpoints() {return node_stride_begin_.size();}
190
192 bool NodeCutpointEvaluated(int32_t node_id) {
193 return (std::find(nodes_enumerated_.begin(), nodes_enumerated_.end(), node_id) != nodes_enumerated_.end());
194 }
195
197 int32_t CurrentNodeEvaluated() {return current_node_;}
198
200 std::vector<data_size_t> node_stride_begin_;
201 std::vector<data_size_t> node_stride_length_;
202
203 private:
204 int32_t cutpoint_grid_size_;
205 std::vector<int32_t> nodes_enumerated_;
206 int32_t current_node_;
207};
208
209} // namespace StochTree
210
211#endif // STOCHTREE_CUTPOINT_CANDIDATES_H_
Container class for FeatureCutpointGrid objects stored for every feature in a dataset.
Definition cutpoint_candidates.h:112
double CutpointValue(int i, int feature_index)
Value of the upper-bound (cutpoint) implied by bin i.
Definition cutpoint_candidates.h:155
int32_t BinStartIndex(int i, int feature_index)
Beginning index of bin i.
Definition cutpoint_candidates.h:146
int32_t BinLength(int i, int feature_index)
Size of bin i.
Definition cutpoint_candidates.h:149
int32_t NumCutpoints(int feature_index)
Number of potential cutpoints enumerated.
Definition cutpoint_candidates.h:143
std::vector< std::uint32_t > CutpointVector(int i, int feature_index)
Vector of cutpoint values up to and including bin i Helper function for converting categorical split ...
Definition cutpoint_candidates.h:160
void CalculateStrides(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index, std::vector< FeatureType > &feature_types)
Calculate strides.
Definition cutpoint_candidates.h:135
int32_t CutpointGridSize()
Max size of cutpoint grid.
Definition cutpoint_candidates.h:140
int32_t BinEndIndex(int i, int feature_index)
Beginning index of bin i.
Definition cutpoint_candidates.h:152
Computing and tracking cutpoints available for a given feature at a given node Store cutpoint bins in...
Definition cutpoint_candidates.h:52
int32_t NumCutpoints()
Number of potential cutpoints enumerated.
Definition cutpoint_candidates.h:71
void CalculateStridesOrderedCategorical(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index)
Split numeric / ordered categorical feature and update sort indices.
std::vector< std::uint32_t > CutpointVector(int i)
Vector of cutpoint values up to and including bin i Helper function for converting categorical split ...
Definition cutpoint_candidates.h:88
void CalculateStrides(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index, std::vector< FeatureType > &feature_types)
Calculate strides.
int32_t BinLength(int i)
Size of bin i.
Definition cutpoint_candidates.h:77
void CalculateStridesNumeric(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index)
Split numeric / ordered categorical feature and update sort indices.
void CalculateStridesUnorderedCategorical(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index)
Split unordered categorical feature and update sort indices.
int32_t BinEndIndex(int i)
Beginning index of bin i.
Definition cutpoint_candidates.h:80
int32_t BinStartIndex(int i)
Beginning index of bin i.
Definition cutpoint_candidates.h:74
double CutpointValue(int i)
Value of the upper-bound (cutpoint) implied by bin i.
Definition cutpoint_candidates.h:83
Computing and tracking cutpoints available for a given feature at a given node.
Definition cutpoint_candidates.h:173
void CalculateStridesCategorical(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, data_size_t node_begin, data_size_t node_end, int32_t feature_index)
Split unordered categorical feature and update sort indices.
int32_t NumCutpoints()
Number of potential cutpoints enumerated.
Definition cutpoint_candidates.h:189
void CalculateStridesNumeric(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, data_size_t node_begin, data_size_t node_end, int32_t feature_index)
Split numeric / ordered categorical feature and update sort indices.
int32_t CurrentNodeEvaluated()
Node id of the node that has been most recently evaluated.
Definition cutpoint_candidates.h:197
bool NodeCutpointEvaluated(int32_t node_id)
Whether a cutpoint grid has been enumerated for a given node.
Definition cutpoint_candidates.h:192
std::vector< data_size_t > node_stride_begin_
Vectors of node stride starting points and stride lengths.
Definition cutpoint_candidates.h:200
void CalculateStrides(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index)
Calculate strides.
Data structure for tracking observations through a tree partition with each feature pre-sorted.
Definition partition_tracker.h:601
Definition category_tracker.h:40