StochTree 0.1.1
Loading...
Searching...
No Matches
cutpoint_candidates.h
1
39#ifndef STOCHTREE_CUTPOINT_CANDIDATES_H_
40#define STOCHTREE_CUTPOINT_CANDIDATES_H_
41
42#include <stochtree/meta.h>
43#include <stochtree/partition_tracker.h>
44
45namespace StochTree {
46
51 public:
52 FeatureCutpointGrid(int cutpoint_grid_size) : node_stride_begin_{}, node_stride_length_{}, cutpoint_grid_size_{cutpoint_grid_size} {}
53
55
57 void CalculateStrides(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals, SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index, std::vector<FeatureType>& feature_types);
58
60 void CalculateStridesNumeric(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals, SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index);
61
63 void CalculateStridesOrderedCategorical(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals, SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index);
64
66 void CalculateStridesUnorderedCategorical(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals, SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index);
67
69 int32_t NumCutpoints() {return node_stride_begin_.size();}
70
72 int32_t BinStartIndex(int i) {return node_stride_begin_.at(i);}
73
75 int32_t BinLength(int i) {return node_stride_length_.at(i);}
76
78 int32_t BinEndIndex(int i) {return node_stride_begin_.at(i) + node_stride_length_.at(i);}
79
81 double CutpointValue(int i) {return cutpoint_values_.at(i);}
82
86 std::vector<std::uint32_t> CutpointVector(int i) {
87 std::vector<std::uint32_t> out;
88 int bin_stop = i + 1;
89 for (int j = 0; j < bin_stop; j++) {
90 out.push_back(static_cast<std::uint32_t>(cutpoint_values_.at(j)));
91 }
92 return out;
93 }
94
95 private:
97 std::vector<data_size_t> node_stride_begin_;
98 std::vector<data_size_t> node_stride_length_;
99 std::vector<double> cutpoint_values_;
100 int32_t cutpoint_grid_size_;
101
103 void EnumerateNumericCutpointsDeduplication(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals, SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, data_size_t node_size, int32_t feature_index);
104
106 void ScanNumericCutpoints(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals, SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, data_size_t node_size, int32_t feature_index);
107};
108
111 public:
112 CutpointGridContainer(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals, int cutpoint_grid_size) {
113 num_features_ = covariates.cols();
114 feature_cutpoint_grid_.resize(num_features_);
115 for (int i = 0; i < num_features_; i++) {
116 feature_cutpoint_grid_[i].reset(new FeatureCutpointGrid(cutpoint_grid_size));
117 }
118 cutpoint_grid_size_ = cutpoint_grid_size;
119 }
120
122
123 void Reset(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals, int cutpoint_grid_size) {
124 num_features_ = covariates.cols();
125 feature_cutpoint_grid_.resize(num_features_);
126 for (int i = 0; i < num_features_; i++) {
127 feature_cutpoint_grid_[i].reset(new FeatureCutpointGrid(cutpoint_grid_size));
128 }
129 cutpoint_grid_size_ = cutpoint_grid_size;
130 }
131
133 void CalculateStrides(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals, SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index, std::vector<FeatureType>& feature_types) {
134 feature_cutpoint_grid_[feature_index]->CalculateStrides(covariates, residuals, feature_node_sort_tracker, node_id, node_begin, node_end, feature_index, feature_types);
135 }
136
138 int32_t CutpointGridSize() {return cutpoint_grid_size_;}
139
141 int32_t NumCutpoints(int feature_index) {return feature_cutpoint_grid_[feature_index]->NumCutpoints();}
142
144 int32_t BinStartIndex(int i, int feature_index) {return feature_cutpoint_grid_[feature_index]->BinStartIndex(i);}
145
147 int32_t BinLength(int i, int feature_index) {return feature_cutpoint_grid_[feature_index]->BinLength(i);}
148
150 int32_t BinEndIndex(int i, int feature_index) {return feature_cutpoint_grid_[feature_index]->BinEndIndex(i);}
151
153 double CutpointValue(int i, int feature_index) {return feature_cutpoint_grid_[feature_index]->CutpointValue(i);}
154
158 std::vector<std::uint32_t> CutpointVector(int i, int feature_index) {
159 return feature_cutpoint_grid_[feature_index]->CutpointVector(i);
160 }
161
162 FeatureCutpointGrid* GetFeatureCutpointGrid(int feature_num) {return feature_cutpoint_grid_[feature_num].get(); }
163
164 private:
165 std::vector<std::unique_ptr<FeatureCutpointGrid>> feature_cutpoint_grid_;
166 int num_features_;
167 int cutpoint_grid_size_;
168};
169
172 public:
173 NodeCutpointTracker(int cutpoint_grid_size) : node_stride_begin_{}, node_stride_length_{}, cutpoint_grid_size_{cutpoint_grid_size}, nodes_enumerated_{} {}
174
176
178 void CalculateStrides(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals, SortedNodeSampleTracker* feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index);
179
181 void CalculateStridesNumeric(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals, SortedNodeSampleTracker* feature_node_sort_tracker, data_size_t node_begin, data_size_t node_end, int32_t feature_index);
182
184 void CalculateStridesCategorical(Eigen::MatrixXd& covariates, Eigen::VectorXd& residuals, SortedNodeSampleTracker* feature_node_sort_tracker, data_size_t node_begin, data_size_t node_end, int32_t feature_index);
185
187 int32_t NumCutpoints() {return node_stride_begin_.size();}
188
190 bool NodeCutpointEvaluated(int32_t node_id) {
191 return (std::find(nodes_enumerated_.begin(), nodes_enumerated_.end(), node_id) != nodes_enumerated_.end());
192 }
193
195 int32_t CurrentNodeEvaluated() {return current_node_;}
196
198 std::vector<data_size_t> node_stride_begin_;
199 std::vector<data_size_t> node_stride_length_;
200
201 private:
202 int32_t cutpoint_grid_size_;
203 std::vector<int32_t> nodes_enumerated_;
204 int32_t current_node_;
205};
206
207} // namespace StochTree
208
209#endif // STOCHTREE_CUTPOINT_CANDIDATES_H_
Container class for FeatureCutpointGrid objects stored for every feature in a dataset.
Definition cutpoint_candidates.h:110
double CutpointValue(int i, int feature_index)
Value of the upper-bound (cutpoint) implied by bin i.
Definition cutpoint_candidates.h:153
int32_t BinStartIndex(int i, int feature_index)
Beginning index of bin i.
Definition cutpoint_candidates.h:144
int32_t BinLength(int i, int feature_index)
Size of bin i.
Definition cutpoint_candidates.h:147
int32_t NumCutpoints(int feature_index)
Number of potential cutpoints enumerated.
Definition cutpoint_candidates.h:141
std::vector< std::uint32_t > CutpointVector(int i, int feature_index)
Vector of cutpoint values up to and including bin i Helper function for converting categorical split ...
Definition cutpoint_candidates.h:158
void CalculateStrides(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index, std::vector< FeatureType > &feature_types)
Calculate strides.
Definition cutpoint_candidates.h:133
int32_t CutpointGridSize()
Max size of cutpoint grid.
Definition cutpoint_candidates.h:138
int32_t BinEndIndex(int i, int feature_index)
Beginning index of bin i.
Definition cutpoint_candidates.h:150
Computing and tracking cutpoints available for a given feature at a given node Store cutpoint bins in...
Definition cutpoint_candidates.h:50
int32_t NumCutpoints()
Number of potential cutpoints enumerated.
Definition cutpoint_candidates.h:69
void CalculateStridesOrderedCategorical(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index)
Split numeric / ordered categorical feature and update sort indices.
std::vector< std::uint32_t > CutpointVector(int i)
Vector of cutpoint values up to and including bin i Helper function for converting categorical split ...
Definition cutpoint_candidates.h:86
void CalculateStrides(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index, std::vector< FeatureType > &feature_types)
Calculate strides.
int32_t BinLength(int i)
Size of bin i.
Definition cutpoint_candidates.h:75
void CalculateStridesNumeric(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index)
Split numeric / ordered categorical feature and update sort indices.
void CalculateStridesUnorderedCategorical(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index)
Split unordered categorical feature and update sort indices.
int32_t BinEndIndex(int i)
Beginning index of bin i.
Definition cutpoint_candidates.h:78
int32_t BinStartIndex(int i)
Beginning index of bin i.
Definition cutpoint_candidates.h:72
double CutpointValue(int i)
Value of the upper-bound (cutpoint) implied by bin i.
Definition cutpoint_candidates.h:81
Computing and tracking cutpoints available for a given feature at a given node.
Definition cutpoint_candidates.h:171
void CalculateStridesCategorical(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, data_size_t node_begin, data_size_t node_end, int32_t feature_index)
Split unordered categorical feature and update sort indices.
int32_t NumCutpoints()
Number of potential cutpoints enumerated.
Definition cutpoint_candidates.h:187
void CalculateStridesNumeric(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, data_size_t node_begin, data_size_t node_end, int32_t feature_index)
Split numeric / ordered categorical feature and update sort indices.
int32_t CurrentNodeEvaluated()
Node id of the node that has been most recently evaluated.
Definition cutpoint_candidates.h:195
bool NodeCutpointEvaluated(int32_t node_id)
Whether a cutpoint grid has been enumerated for a given node.
Definition cutpoint_candidates.h:190
std::vector< data_size_t > node_stride_begin_
Vectors of node stride starting points and stride lengths.
Definition cutpoint_candidates.h:198
void CalculateStrides(Eigen::MatrixXd &covariates, Eigen::VectorXd &residuals, SortedNodeSampleTracker *feature_node_sort_tracker, int32_t node_id, data_size_t node_begin, data_size_t node_end, int32_t feature_index)
Calculate strides.
Data structure for tracking observations through a tree partition with each feature pre-sorted.
Definition partition_tracker.h:602
Definition category_tracker.h:36